1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vsin.S" 30 31#include "libm.h" 32 33 RO_DATA 34 .align 64 35constants: 36 .word 0x3ec718e3,0xa6972785 37 .word 0x3ef9fd39,0x94293940 38 .word 0xbf2a019f,0x75ee4be1 39 .word 0xbf56c16b,0xba552569 40 .word 0x3f811111,0x1108c703 41 .word 0x3fa55555,0x554f5b35 42 .word 0xbfc55555,0x555554d0 43 .word 0xbfdfffff,0xffffff85 44 .word 0x3ff00000,0x00000000 45 .word 0xbfc55555,0x5551fc28 46 .word 0x3f811107,0x62eacc9d 47 .word 0xbfdfffff,0xffff6328 48 .word 0x3fa55551,0x5f7acf0c 49 .word 0x3fe45f30,0x6dc9c883 50 .word 0x43380000,0x00000000 51 .word 0x3ff921fb,0x54400000 52 .word 0x3dd0b461,0x1a600000 53 .word 0x3ba3198a,0x2e000000 54 .word 0x397b839a,0x252049c1 55 .word 0x80000000,0x00004000 56 .word 0xffff8000,0x00000000 ! N.B.: low-order words used 57 .word 0x3fc90000,0x80000000 ! for sign bit hacking; see 58 .word 0x3fc40000,0x00000000 ! references to "thresh" below 59 60#define p4 0x0 61#define q4 0x08 62#define p3 0x10 63#define q3 0x18 64#define p2 0x20 65#define q2 0x28 66#define p1 0x30 67#define q1 0x38 68#define one 0x40 69#define pp1 0x48 70#define pp2 0x50 71#define qq1 0x58 72#define qq2 0x60 73#define invpio2 0x68 74#define round 0x70 75#define pio2_1 0x78 76#define pio2_2 0x80 77#define pio2_3 0x88 78#define pio2_3t 0x90 79#define f30val 0x98 80#define mask 0xa0 81#define thresh 0xa8 82 83! local storage indices 84 85#define xsave STACK_BIAS-0x8 86#define ysave STACK_BIAS-0x10 87#define nsave STACK_BIAS-0x14 88#define sxsave STACK_BIAS-0x18 89#define sysave STACK_BIAS-0x1c 90#define biguns STACK_BIAS-0x20 91#define n2 STACK_BIAS-0x24 92#define n1 STACK_BIAS-0x28 93#define n0 STACK_BIAS-0x2c 94#define x2_1 STACK_BIAS-0x40 95#define x1_1 STACK_BIAS-0x50 96#define x0_1 STACK_BIAS-0x60 97#define y2_0 STACK_BIAS-0x70 98#define y1_0 STACK_BIAS-0x80 99#define y0_0 STACK_BIAS-0x90 100! sizeof temp storage - must be a multiple of 16 for V9 101#define tmps 0x90 102 103!-------------------------------------------------------------- 104! Some defines to keep code more readable 105#define LIM_l6 %l6 106! in primary range, contains |x| upper limit when cos(x)=1. 107! in transferring to medium range, denotes what loop was active. 108!-------------------------------------------------------------- 109 110 ENTRY(__vsin) 111 save %sp,-SA(MINFRAME)-tmps,%sp 112 PIC_SETUP(g5) 113 PIC_SET(g5,__vlibm_TBL_sincos_hi,l3) 114 PIC_SET(g5,__vlibm_TBL_sincos_lo,l4) 115 PIC_SET(g5,constants,l5) 116 mov %l5,%g1 117 wr %g0,0x82,%asi ! set %asi for non-faulting loads 118 119! ========== primary range ========== 120 121! register use 122 123! i0 n 124! i1 x 125! i2 stridex 126! i3 y 127! i4 stridey 128! i5 0x80000000 129 130! l0 hx0 131! l1 hx1 132! l2 hx2 133! l3 __vlibm_TBL_sincos_hi 134! l4 __vlibm_TBL_sincos_lo 135! l5 0x3fc90000 136! l6 0x3e400000 137! l7 0x3fe921fb 138 139! the following are 64-bit registers in both V8+ and V9 140 141! g1 scratch 142! g5 143 144! o0 py0 145! o1 py1 146! o2 py2 147! o3 oy0 148! o4 oy1 149! o5 oy2 150! o7 scratch 151 152! f0 x0 153! f2 154! f4 155! f6 156! f8 scratch for table base 157! f9 signbit0 158! f10 x1 159! f12 160! f14 161! f16 162! f18 scratch for table base 163! f19 signbit1 164! f20 x2 165! f22 166! f24 167! f26 168! f28 scratch for table base 169! f29 signbit2 170! f30 0x80000000 171! f31 0x4000 172! f32 173! f34 174! f36 175! f38 176! f40 177! f42 178! f44 0xffff800000000000 179! f46 p1 180! f48 p2 181! f50 p3 182! f52 p4 183! f54 one 184! f56 pp1 185! f58 pp2 186! f60 qq1 187! f62 qq2 188 189#ifdef __sparcv9 190 stx %i1,[%fp+xsave] ! save arguments 191 stx %i3,[%fp+ysave] 192#else 193 st %i1,[%fp+xsave] ! save arguments 194 st %i3,[%fp+ysave] 195#endif 196 st %i0,[%fp+nsave] 197 st %i2,[%fp+sxsave] 198 st %i4,[%fp+sysave] 199 sethi %hi(0x80000000),%i5 ! load/set up constants 200 sethi %hi(0x3fc90000),%l5 201 sethi %hi(0x3e400000),LIM_l6 202 sethi %hi(0x3fe921fb),%l7 203 or %l7,%lo(0x3fe921fb),%l7 204 ldd [%g1+f30val],%f30 205 ldd [%g1+mask],%f44 206 ldd [%g1+p1],%f46 207 ldd [%g1+p2],%f48 208 ldd [%g1+p3],%f50 209 ldd [%g1+p4],%f52 210 ldd [%g1+one],%f54 211 ldd [%g1+pp1],%f56 212 ldd [%g1+pp2],%f58 213 ldd [%g1+qq1],%f60 214 ldd [%g1+qq2],%f62 215 sll %i2,3,%i2 ! scale strides 216 sll %i4,3,%i4 217 add %fp,x0_1,%o3 ! precondition loop 218 add %fp,x0_1,%o4 219 add %fp,x0_1,%o5 220 ld [%i1],%l0 ! hx = *x 221 ld [%i1],%f0 222 ld [%i1+4],%f1 223 andn %l0,%i5,%l0 ! hx &= ~0x80000000 224 add %i1,%i2,%i1 ! x += stridex 225 226 ba,pt %icc,.loop0 227! delay slot 228 nop 229 230 .align 32 231.loop0: 232 lda [%i1]%asi,%l1 ! preload next argument 233 sub %l0,LIM_l6,%g1 234 sub %l7,%l0,%o7 235 fands %f0,%f30,%f9 ! save signbit 236 237 lda [%i1]%asi,%f10 238 orcc %o7,%g1,%g0 239 mov %i3,%o0 ! py0 = y 240 bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb 241 242! delay slot 243 lda [%i1+4]%asi,%f11 244 addcc %i0,-1,%i0 245 add %i3,%i4,%i3 ! y += stridey 246 ble,pn %icc,.endloop1 247 248! delay slot 249 andn %l1,%i5,%l1 250 add %i1,%i2,%i1 ! x += stridex 251 fabsd %f0,%f0 252 fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only 253 254.loop1: 255 lda [%i1]%asi,%l2 ! preload next argument 256 sub %l1,LIM_l6,%g1 257 sub %l7,%l1,%o7 258 fands %f10,%f30,%f19 ! save signbit 259 260 lda [%i1]%asi,%f20 261 orcc %o7,%g1,%g0 262 mov %i3,%o1 ! py1 = y 263 bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb 264 265! delay slot 266 lda [%i1+4]%asi,%f21 267 addcc %i0,-1,%i0 268 add %i3,%i4,%i3 ! y += stridey 269 ble,pn %icc,.endloop2 270 271! delay slot 272 andn %l2,%i5,%l2 273 add %i1,%i2,%i1 ! x += stridex 274 fabsd %f10,%f10 275 fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only 276 277.loop2: 278 st %f6,[%o3] 279 sub %l2,LIM_l6,%g1 280 sub %l7,%l2,%o7 281 fands %f20,%f30,%f29 ! save signbit 282 283 st %f7,[%o3+4] 284 orcc %g1,%o7,%g0 285 mov %i3,%o2 ! py2 = y 286 bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb 287 288! delay slot 289 add %i3,%i4,%i3 ! y += stridey 290 cmp %l0,%l5 291 fabsd %f20,%f20 292 bl,pn %icc,.case4 293 294! delay slot 295 st %f16,[%o4] 296 cmp %l1,%l5 297 fpadd32s %f0,%f31,%f8 298 bl,pn %icc,.case2 299 300! delay slot 301 st %f17,[%o4+4] 302 cmp %l2,%l5 303 fpadd32s %f10,%f31,%f18 304 bl,pn %icc,.case1 305 306! delay slot 307 st %f26,[%o5] 308 mov %o0,%o3 309 sethi %hi(0x3fc3c000),%o7 310 fpadd32s %f20,%f31,%f28 311 312 st %f27,[%o5+4] 313 fand %f8,%f44,%f2 314 mov %o1,%o4 315 316 fand %f18,%f44,%f12 317 mov %o2,%o5 318 sub %l0,%o7,%l0 319 320 fand %f28,%f44,%f22 321 sub %l1,%o7,%l1 322 sub %l2,%o7,%l2 323 324 fsubd %f0,%f2,%f0 325 srl %l0,10,%l0 326 add %l3,8,%g1 327 328 fsubd %f10,%f12,%f10 329 srl %l1,10,%l1 330 331 fsubd %f20,%f22,%f20 332 srl %l2,10,%l2 333 334 fmuld %f0,%f0,%f2 335 andn %l0,0x1f,%l0 336 337 fmuld %f10,%f10,%f12 338 andn %l1,0x1f,%l1 339 340 fmuld %f20,%f20,%f22 341 andn %l2,0x1f,%l2 342 343 fmuld %f2,%f58,%f6 344 ldd [%l3+%l0],%f32 345 346 fmuld %f12,%f58,%f16 347 ldd [%l3+%l1],%f36 348 349 fmuld %f22,%f58,%f26 350 ldd [%l3+%l2],%f40 351 352 faddd %f6,%f56,%f6 353 fmuld %f2,%f62,%f4 354 ldd [%g1+%l0],%f34 355 356 faddd %f16,%f56,%f16 357 fmuld %f12,%f62,%f14 358 ldd [%g1+%l1],%f38 359 360 faddd %f26,%f56,%f26 361 fmuld %f22,%f62,%f24 362 ldd [%g1+%l2],%f42 363 364 fmuld %f2,%f6,%f6 365 faddd %f4,%f60,%f4 366 367 fmuld %f12,%f16,%f16 368 faddd %f14,%f60,%f14 369 370 fmuld %f22,%f26,%f26 371 faddd %f24,%f60,%f24 372 373 faddd %f6,%f54,%f6 374 fmuld %f2,%f4,%f4 375 376 faddd %f16,%f54,%f16 377 fmuld %f12,%f14,%f14 378 379 faddd %f26,%f54,%f26 380 fmuld %f22,%f24,%f24 381 382 fmuld %f0,%f6,%f6 383 ldd [%l4+%l0],%f2 384 385 fmuld %f10,%f16,%f16 386 ldd [%l4+%l1],%f12 387 388 fmuld %f20,%f26,%f26 389 ldd [%l4+%l2],%f22 390 391 fmuld %f4,%f32,%f4 392 lda [%i1]%asi,%l0 ! preload next argument 393 394 fmuld %f14,%f36,%f14 395 lda [%i1]%asi,%f0 396 397 fmuld %f24,%f40,%f24 398 lda [%i1+4]%asi,%f1 399 400 fmuld %f6,%f34,%f6 401 add %i1,%i2,%i1 ! x += stridex 402 403 fmuld %f16,%f38,%f16 404 405 fmuld %f26,%f42,%f26 406 407 faddd %f6,%f4,%f6 408 409 faddd %f16,%f14,%f16 410 411 faddd %f26,%f24,%f26 412 413 faddd %f6,%f2,%f6 414 415 faddd %f16,%f12,%f16 416 417 faddd %f26,%f22,%f26 418 419 faddd %f6,%f32,%f6 420 421 faddd %f16,%f36,%f16 422 423 faddd %f26,%f40,%f26 424 andn %l0,%i5,%l0 ! hx &= ~0x80000000 425 426 fors %f6,%f9,%f6 427 addcc %i0,-1,%i0 428 429 fors %f16,%f19,%f16 430 bg,pt %icc,.loop0 431 432! delay slot 433 fors %f26,%f29,%f26 434 435 ba,pt %icc,.endloop0 436! delay slot 437 nop 438 439 .align 32 440.case1: 441 st %f27,[%o5+4] 442 sethi %hi(0x3fc3c000),%o7 443 add %l3,8,%g1 444 fand %f8,%f44,%f2 445 446 sub %l0,%o7,%l0 447 sub %l1,%o7,%l1 448 fand %f18,%f44,%f12 449 fmuld %f20,%f20,%f22 450 451 fsubd %f0,%f2,%f0 452 srl %l0,10,%l0 453 mov %o0,%o3 454 455 fsubd %f10,%f12,%f10 456 srl %l1,10,%l1 457 mov %o1,%o4 458 459 fmuld %f22,%f52,%f24 460 mov %o2,%o5 461 462 fmuld %f0,%f0,%f2 463 andn %l0,0x1f,%l0 464 465 fmuld %f10,%f10,%f12 466 andn %l1,0x1f,%l1 467 468 faddd %f24,%f50,%f24 469 470 fmuld %f2,%f58,%f6 471 ldd [%l3+%l0],%f32 472 473 fmuld %f12,%f58,%f16 474 ldd [%l3+%l1],%f36 475 476 fmuld %f22,%f24,%f24 477 478 faddd %f6,%f56,%f6 479 fmuld %f2,%f62,%f4 480 ldd [%g1+%l0],%f34 481 482 faddd %f16,%f56,%f16 483 fmuld %f12,%f62,%f14 484 ldd [%g1+%l1],%f38 485 486 faddd %f24,%f48,%f24 487 488 fmuld %f2,%f6,%f6 489 faddd %f4,%f60,%f4 490 491 fmuld %f12,%f16,%f16 492 faddd %f14,%f60,%f14 493 494 fmuld %f22,%f24,%f24 495 496 faddd %f6,%f54,%f6 497 fmuld %f2,%f4,%f4 498 499 faddd %f16,%f54,%f16 500 fmuld %f12,%f14,%f14 501 502 faddd %f24,%f46,%f24 503 504 fmuld %f0,%f6,%f6 505 ldd [%l4+%l0],%f2 506 507 fmuld %f10,%f16,%f16 508 ldd [%l4+%l1],%f12 509 510 fmuld %f4,%f32,%f4 511 lda [%i1]%asi,%l0 ! preload next argument 512 513 fmuld %f14,%f36,%f14 514 lda [%i1]%asi,%f0 515 516 fmuld %f6,%f34,%f6 517 lda [%i1+4]%asi,%f1 518 519 fmuld %f16,%f38,%f16 520 add %i1,%i2,%i1 ! x += stridex 521 522 fmuld %f22,%f24,%f24 523 524 faddd %f6,%f4,%f6 525 526 faddd %f16,%f14,%f16 527 528 fmuld %f20,%f24,%f24 529 530 faddd %f6,%f2,%f6 531 532 faddd %f16,%f12,%f16 533 534 faddd %f20,%f24,%f26 535 536 faddd %f6,%f32,%f6 537 538 faddd %f16,%f36,%f16 539 andn %l0,%i5,%l0 ! hx &= ~0x80000000 540 541 fors %f26,%f29,%f26 542 addcc %i0,-1,%i0 543 544 fors %f6,%f9,%f6 545 bg,pt %icc,.loop0 546 547! delay slot 548 fors %f16,%f19,%f16 549 550 ba,pt %icc,.endloop0 551! delay slot 552 nop 553 554 .align 32 555.case2: 556 st %f26,[%o5] 557 cmp %l2,%l5 558 fpadd32s %f20,%f31,%f28 559 bl,pn %icc,.case3 560 561! delay slot 562 st %f27,[%o5+4] 563 sethi %hi(0x3fc3c000),%o7 564 add %l3,8,%g1 565 fand %f8,%f44,%f2 566 567 sub %l0,%o7,%l0 568 sub %l2,%o7,%l2 569 fand %f28,%f44,%f22 570 fmuld %f10,%f10,%f12 571 572 fsubd %f0,%f2,%f0 573 srl %l0,10,%l0 574 mov %o0,%o3 575 576 fsubd %f20,%f22,%f20 577 srl %l2,10,%l2 578 mov %o2,%o5 579 580 fmuld %f12,%f52,%f14 581 mov %o1,%o4 582 583 fmuld %f0,%f0,%f2 584 andn %l0,0x1f,%l0 585 586 fmuld %f20,%f20,%f22 587 andn %l2,0x1f,%l2 588 589 faddd %f14,%f50,%f14 590 591 fmuld %f2,%f58,%f6 592 ldd [%l3+%l0],%f32 593 594 fmuld %f22,%f58,%f26 595 ldd [%l3+%l2],%f40 596 597 fmuld %f12,%f14,%f14 598 599 faddd %f6,%f56,%f6 600 fmuld %f2,%f62,%f4 601 ldd [%g1+%l0],%f34 602 603 faddd %f26,%f56,%f26 604 fmuld %f22,%f62,%f24 605 ldd [%g1+%l2],%f42 606 607 faddd %f14,%f48,%f14 608 609 fmuld %f2,%f6,%f6 610 faddd %f4,%f60,%f4 611 612 fmuld %f22,%f26,%f26 613 faddd %f24,%f60,%f24 614 615 fmuld %f12,%f14,%f14 616 617 faddd %f6,%f54,%f6 618 fmuld %f2,%f4,%f4 619 620 faddd %f26,%f54,%f26 621 fmuld %f22,%f24,%f24 622 623 faddd %f14,%f46,%f14 624 625 fmuld %f0,%f6,%f6 626 ldd [%l4+%l0],%f2 627 628 fmuld %f20,%f26,%f26 629 ldd [%l4+%l2],%f22 630 631 fmuld %f4,%f32,%f4 632 lda [%i1]%asi,%l0 ! preload next argument 633 634 fmuld %f24,%f40,%f24 635 lda [%i1]%asi,%f0 636 637 fmuld %f6,%f34,%f6 638 lda [%i1+4]%asi,%f1 639 640 fmuld %f26,%f42,%f26 641 add %i1,%i2,%i1 ! x += stridex 642 643 fmuld %f12,%f14,%f14 644 645 faddd %f6,%f4,%f6 646 647 faddd %f26,%f24,%f26 648 649 fmuld %f10,%f14,%f14 650 651 faddd %f6,%f2,%f6 652 653 faddd %f26,%f22,%f26 654 655 faddd %f10,%f14,%f16 656 657 faddd %f6,%f32,%f6 658 659 faddd %f26,%f40,%f26 660 andn %l0,%i5,%l0 ! hx &= ~0x80000000 661 662 fors %f16,%f19,%f16 663 addcc %i0,-1,%i0 664 665 fors %f6,%f9,%f6 666 bg,pt %icc,.loop0 667 668! delay slot 669 fors %f26,%f29,%f26 670 671 ba,pt %icc,.endloop0 672! delay slot 673 nop 674 675 .align 32 676.case3: 677 sethi %hi(0x3fc3c000),%o7 678 add %l3,8,%g1 679 fand %f8,%f44,%f2 680 fmuld %f10,%f10,%f12 681 682 sub %l0,%o7,%l0 683 fmuld %f20,%f20,%f22 684 685 fsubd %f0,%f2,%f0 686 srl %l0,10,%l0 687 mov %o0,%o3 688 689 fmuld %f12,%f52,%f14 690 mov %o1,%o4 691 692 fmuld %f22,%f52,%f24 693 mov %o2,%o5 694 695 fmuld %f0,%f0,%f2 696 andn %l0,0x1f,%l0 697 698 faddd %f14,%f50,%f14 699 700 faddd %f24,%f50,%f24 701 702 fmuld %f2,%f58,%f6 703 ldd [%l3+%l0],%f32 704 705 fmuld %f12,%f14,%f14 706 707 fmuld %f22,%f24,%f24 708 709 faddd %f6,%f56,%f6 710 fmuld %f2,%f62,%f4 711 ldd [%g1+%l0],%f34 712 713 faddd %f14,%f48,%f14 714 715 faddd %f24,%f48,%f24 716 717 fmuld %f2,%f6,%f6 718 faddd %f4,%f60,%f4 719 720 fmuld %f12,%f14,%f14 721 722 fmuld %f22,%f24,%f24 723 724 faddd %f6,%f54,%f6 725 fmuld %f2,%f4,%f4 726 727 faddd %f14,%f46,%f14 728 729 faddd %f24,%f46,%f24 730 731 fmuld %f0,%f6,%f6 732 ldd [%l4+%l0],%f2 733 734 fmuld %f4,%f32,%f4 735 lda [%i1]%asi,%l0 ! preload next argument 736 737 fmuld %f12,%f14,%f14 738 lda [%i1]%asi,%f0 739 740 fmuld %f6,%f34,%f6 741 lda [%i1+4]%asi,%f1 742 743 fmuld %f22,%f24,%f24 744 add %i1,%i2,%i1 ! x += stridex 745 746 fmuld %f10,%f14,%f14 747 748 faddd %f6,%f4,%f6 749 750 fmuld %f20,%f24,%f24 751 752 faddd %f10,%f14,%f16 753 754 faddd %f6,%f2,%f6 755 756 faddd %f20,%f24,%f26 757 758 fors %f16,%f19,%f16 759 andn %l0,%i5,%l0 ! hx &= ~0x80000000 760 761 faddd %f6,%f32,%f6 762 addcc %i0,-1,%i0 763 764 fors %f26,%f29,%f26 765 bg,pt %icc,.loop0 766 767! delay slot 768 fors %f6,%f9,%f6 769 770 ba,pt %icc,.endloop0 771! delay slot 772 nop 773 774 .align 32 775.case4: 776 st %f17,[%o4+4] 777 cmp %l1,%l5 778 fpadd32s %f10,%f31,%f18 779 bl,pn %icc,.case6 780 781! delay slot 782 st %f26,[%o5] 783 cmp %l2,%l5 784 fpadd32s %f20,%f31,%f28 785 bl,pn %icc,.case5 786 787! delay slot 788 st %f27,[%o5+4] 789 sethi %hi(0x3fc3c000),%o7 790 add %l3,8,%g1 791 fand %f18,%f44,%f12 792 793 sub %l1,%o7,%l1 794 sub %l2,%o7,%l2 795 fand %f28,%f44,%f22 796 fmuld %f0,%f0,%f2 797 798 fsubd %f10,%f12,%f10 799 srl %l1,10,%l1 800 mov %o1,%o4 801 802 fsubd %f20,%f22,%f20 803 srl %l2,10,%l2 804 mov %o2,%o5 805 806 fmovd %f0,%f6 807 fmuld %f2,%f52,%f4 808 mov %o0,%o3 809 810 fmuld %f10,%f10,%f12 811 andn %l1,0x1f,%l1 812 813 fmuld %f20,%f20,%f22 814 andn %l2,0x1f,%l2 815 816 faddd %f4,%f50,%f4 817 818 fmuld %f12,%f58,%f16 819 ldd [%l3+%l1],%f36 820 821 fmuld %f22,%f58,%f26 822 ldd [%l3+%l2],%f40 823 824 fmuld %f2,%f4,%f4 825 826 faddd %f16,%f56,%f16 827 fmuld %f12,%f62,%f14 828 ldd [%g1+%l1],%f38 829 830 faddd %f26,%f56,%f26 831 fmuld %f22,%f62,%f24 832 ldd [%g1+%l2],%f42 833 834 faddd %f4,%f48,%f4 835 836 fmuld %f12,%f16,%f16 837 faddd %f14,%f60,%f14 838 839 fmuld %f22,%f26,%f26 840 faddd %f24,%f60,%f24 841 842 fmuld %f2,%f4,%f4 843 844 faddd %f16,%f54,%f16 845 fmuld %f12,%f14,%f14 846 847 faddd %f26,%f54,%f26 848 fmuld %f22,%f24,%f24 849 850 faddd %f4,%f46,%f4 851 852 fmuld %f10,%f16,%f16 853 ldd [%l4+%l1],%f12 854 855 fmuld %f20,%f26,%f26 856 ldd [%l4+%l2],%f22 857 858 fmuld %f14,%f36,%f14 859 lda [%i1]%asi,%l0 ! preload next argument 860 861 fmuld %f24,%f40,%f24 862 lda [%i1]%asi,%f0 863 864 fmuld %f16,%f38,%f16 865 lda [%i1+4]%asi,%f1 866 867 fmuld %f26,%f42,%f26 868 add %i1,%i2,%i1 ! x += stridex 869 870 fmuld %f2,%f4,%f4 871 872 faddd %f16,%f14,%f16 873 874 faddd %f26,%f24,%f26 875 876 fmuld %f6,%f4,%f4 877 878 faddd %f16,%f12,%f16 879 880 faddd %f26,%f22,%f26 881 882 faddd %f6,%f4,%f6 883 884 faddd %f16,%f36,%f16 885 886 faddd %f26,%f40,%f26 887 andn %l0,%i5,%l0 ! hx &= ~0x80000000 888 889 fors %f6,%f9,%f6 890 addcc %i0,-1,%i0 891 892 fors %f16,%f19,%f16 893 bg,pt %icc,.loop0 894 895! delay slot 896 fors %f26,%f29,%f26 897 898 ba,pt %icc,.endloop0 899! delay slot 900 nop 901 902 .align 32 903.case5: 904 sethi %hi(0x3fc3c000),%o7 905 add %l3,8,%g1 906 fand %f18,%f44,%f12 907 fmuld %f0,%f0,%f2 908 909 sub %l1,%o7,%l1 910 fmuld %f20,%f20,%f22 911 912 fsubd %f10,%f12,%f10 913 srl %l1,10,%l1 914 mov %o1,%o4 915 916 fmovd %f0,%f6 917 fmuld %f2,%f52,%f4 918 mov %o0,%o3 919 920 fmuld %f22,%f52,%f24 921 mov %o2,%o5 922 923 fmuld %f10,%f10,%f12 924 andn %l1,0x1f,%l1 925 926 faddd %f4,%f50,%f4 927 928 faddd %f24,%f50,%f24 929 930 fmuld %f12,%f58,%f16 931 ldd [%l3+%l1],%f36 932 933 fmuld %f2,%f4,%f4 934 935 fmuld %f22,%f24,%f24 936 937 faddd %f16,%f56,%f16 938 fmuld %f12,%f62,%f14 939 ldd [%g1+%l1],%f38 940 941 faddd %f4,%f48,%f4 942 943 faddd %f24,%f48,%f24 944 945 fmuld %f12,%f16,%f16 946 faddd %f14,%f60,%f14 947 948 fmuld %f2,%f4,%f4 949 950 fmuld %f22,%f24,%f24 951 952 faddd %f16,%f54,%f16 953 fmuld %f12,%f14,%f14 954 955 faddd %f4,%f46,%f4 956 957 faddd %f24,%f46,%f24 958 959 fmuld %f10,%f16,%f16 960 ldd [%l4+%l1],%f12 961 962 fmuld %f14,%f36,%f14 963 lda [%i1]%asi,%l0 ! preload next argument 964 965 fmuld %f2,%f4,%f4 966 lda [%i1]%asi,%f0 967 968 fmuld %f16,%f38,%f16 969 lda [%i1+4]%asi,%f1 970 971 fmuld %f22,%f24,%f24 972 add %i1,%i2,%i1 ! x += stridex 973 974 fmuld %f6,%f4,%f4 975 976 faddd %f16,%f14,%f16 977 978 fmuld %f20,%f24,%f24 979 980 faddd %f6,%f4,%f6 981 982 faddd %f16,%f12,%f16 983 984 faddd %f20,%f24,%f26 985 986 fors %f6,%f9,%f6 987 andn %l0,%i5,%l0 ! hx &= ~0x80000000 988 989 faddd %f16,%f36,%f16 990 addcc %i0,-1,%i0 991 992 fors %f26,%f29,%f26 993 bg,pt %icc,.loop0 994 995! delay slot 996 fors %f16,%f19,%f16 997 998 ba,pt %icc,.endloop0 999! delay slot 1000 nop 1001 1002 .align 32 1003.case6: 1004 st %f27,[%o5+4] 1005 cmp %l2,%l5 1006 fpadd32s %f20,%f31,%f28 1007 bl,pn %icc,.case7 1008 1009! delay slot 1010 sethi %hi(0x3fc3c000),%o7 1011 add %l3,8,%g1 1012 fand %f28,%f44,%f22 1013 fmuld %f0,%f0,%f2 1014 1015 sub %l2,%o7,%l2 1016 fmuld %f10,%f10,%f12 1017 1018 fsubd %f20,%f22,%f20 1019 srl %l2,10,%l2 1020 mov %o2,%o5 1021 1022 fmovd %f0,%f6 1023 fmuld %f2,%f52,%f4 1024 mov %o0,%o3 1025 1026 fmuld %f12,%f52,%f14 1027 mov %o1,%o4 1028 1029 fmuld %f20,%f20,%f22 1030 andn %l2,0x1f,%l2 1031 1032 faddd %f4,%f50,%f4 1033 1034 faddd %f14,%f50,%f14 1035 1036 fmuld %f22,%f58,%f26 1037 ldd [%l3+%l2],%f40 1038 1039 fmuld %f2,%f4,%f4 1040 1041 fmuld %f12,%f14,%f14 1042 1043 faddd %f26,%f56,%f26 1044 fmuld %f22,%f62,%f24 1045 ldd [%g1+%l2],%f42 1046 1047 faddd %f4,%f48,%f4 1048 1049 faddd %f14,%f48,%f14 1050 1051 fmuld %f22,%f26,%f26 1052 faddd %f24,%f60,%f24 1053 1054 fmuld %f2,%f4,%f4 1055 1056 fmuld %f12,%f14,%f14 1057 1058 faddd %f26,%f54,%f26 1059 fmuld %f22,%f24,%f24 1060 1061 faddd %f4,%f46,%f4 1062 1063 faddd %f14,%f46,%f14 1064 1065 fmuld %f20,%f26,%f26 1066 ldd [%l4+%l2],%f22 1067 1068 fmuld %f24,%f40,%f24 1069 lda [%i1]%asi,%l0 ! preload next argument 1070 1071 fmuld %f2,%f4,%f4 1072 lda [%i1]%asi,%f0 1073 1074 fmuld %f26,%f42,%f26 1075 lda [%i1+4]%asi,%f1 1076 1077 fmuld %f12,%f14,%f14 1078 add %i1,%i2,%i1 ! x += stridex 1079 1080 fmuld %f6,%f4,%f4 1081 1082 faddd %f26,%f24,%f26 1083 1084 fmuld %f10,%f14,%f14 1085 1086 faddd %f6,%f4,%f6 1087 1088 faddd %f26,%f22,%f26 1089 1090 faddd %f10,%f14,%f16 1091 1092 fors %f6,%f9,%f6 1093 andn %l0,%i5,%l0 ! hx &= ~0x80000000 1094 1095 faddd %f26,%f40,%f26 1096 addcc %i0,-1,%i0 1097 1098 fors %f16,%f19,%f16 1099 bg,pt %icc,.loop0 1100 1101! delay slot 1102 fors %f26,%f29,%f26 1103 1104 ba,pt %icc,.endloop0 1105! delay slot 1106 nop 1107 1108 .align 32 1109.case7: 1110 fmuld %f0,%f0,%f2 1111 fmovd %f0,%f6 1112 mov %o0,%o3 1113 1114 fmuld %f10,%f10,%f12 1115 mov %o1,%o4 1116 1117 fmuld %f20,%f20,%f22 1118 mov %o2,%o5 1119 1120 fmuld %f2,%f52,%f4 1121 lda [%i1]%asi,%l0 ! preload next argument 1122 1123 fmuld %f12,%f52,%f14 1124 lda [%i1]%asi,%f0 1125 1126 fmuld %f22,%f52,%f24 1127 lda [%i1+4]%asi,%f1 1128 1129 faddd %f4,%f50,%f4 1130 add %i1,%i2,%i1 ! x += stridex 1131 1132 faddd %f14,%f50,%f14 1133 1134 faddd %f24,%f50,%f24 1135 1136 fmuld %f2,%f4,%f4 1137 1138 fmuld %f12,%f14,%f14 1139 1140 fmuld %f22,%f24,%f24 1141 1142 faddd %f4,%f48,%f4 1143 1144 faddd %f14,%f48,%f14 1145 1146 faddd %f24,%f48,%f24 1147 1148 fmuld %f2,%f4,%f4 1149 1150 fmuld %f12,%f14,%f14 1151 1152 fmuld %f22,%f24,%f24 1153 1154 faddd %f4,%f46,%f4 1155 1156 faddd %f14,%f46,%f14 1157 1158 faddd %f24,%f46,%f24 1159 1160 fmuld %f2,%f4,%f4 1161 1162 fmuld %f12,%f14,%f14 1163 1164 fmuld %f22,%f24,%f24 1165 1166 fmuld %f6,%f4,%f4 1167 1168 fmuld %f10,%f14,%f14 1169 1170 fmuld %f20,%f24,%f24 1171 1172 faddd %f6,%f4,%f6 1173 1174 faddd %f10,%f14,%f16 1175 1176 faddd %f20,%f24,%f26 1177 andn %l0,%i5,%l0 ! hx &= ~0x80000000 1178 1179 fors %f6,%f9,%f6 1180 addcc %i0,-1,%i0 1181 1182 fors %f16,%f19,%f16 1183 bg,pt %icc,.loop0 1184 1185! delay slot 1186 fors %f26,%f29,%f26 1187 1188 ba,pt %icc,.endloop0 1189! delay slot 1190 nop 1191 1192 1193 .align 32 1194.endloop2: 1195 cmp %l1,%l5 1196 bl,pn %icc,1f 1197! delay slot 1198 fabsd %f10,%f10 1199 sethi %hi(0x3fc3c000),%o7 1200 fpadd32s %f10,%f31,%f18 1201 add %l3,8,%g1 1202 fand %f18,%f44,%f12 1203 sub %l1,%o7,%l1 1204 fsubd %f10,%f12,%f10 1205 srl %l1,10,%l1 1206 fmuld %f10,%f10,%f12 1207 andn %l1,0x1f,%l1 1208 fmuld %f12,%f58,%f20 1209 ldd [%l3+%l1],%f36 1210 faddd %f20,%f56,%f20 1211 fmuld %f12,%f62,%f14 1212 ldd [%g1+%l1],%f38 1213 fmuld %f12,%f20,%f20 1214 faddd %f14,%f60,%f14 1215 faddd %f20,%f54,%f20 1216 fmuld %f12,%f14,%f14 1217 fmuld %f10,%f20,%f20 1218 ldd [%l4+%l1],%f12 1219 fmuld %f14,%f36,%f14 1220 fmuld %f20,%f38,%f20 1221 faddd %f20,%f14,%f20 1222 faddd %f20,%f12,%f20 1223 ba,pt %icc,2f 1224! delay slot 1225 faddd %f20,%f36,%f20 12261: 1227 fmuld %f10,%f10,%f12 1228 fmuld %f12,%f52,%f14 1229 faddd %f14,%f50,%f14 1230 fmuld %f12,%f14,%f14 1231 faddd %f14,%f48,%f14 1232 fmuld %f12,%f14,%f14 1233 faddd %f14,%f46,%f14 1234 fmuld %f12,%f14,%f14 1235 fmuld %f10,%f14,%f14 1236 faddd %f10,%f14,%f20 12372: 1238 fors %f20,%f19,%f20 1239 st %f20,[%o1] 1240 st %f21,[%o1+4] 1241 1242.endloop1: 1243 cmp %l0,%l5 1244 bl,pn %icc,1f 1245! delay slot 1246 fabsd %f0,%f0 1247 sethi %hi(0x3fc3c000),%o7 1248 fpadd32s %f0,%f31,%f8 1249 add %l3,8,%g1 1250 fand %f8,%f44,%f2 1251 sub %l0,%o7,%l0 1252 fsubd %f0,%f2,%f0 1253 srl %l0,10,%l0 1254 fmuld %f0,%f0,%f2 1255 andn %l0,0x1f,%l0 1256 fmuld %f2,%f58,%f20 1257 ldd [%l3+%l0],%f32 1258 faddd %f20,%f56,%f20 1259 fmuld %f2,%f62,%f4 1260 ldd [%g1+%l0],%f34 1261 fmuld %f2,%f20,%f20 1262 faddd %f4,%f60,%f4 1263 faddd %f20,%f54,%f20 1264 fmuld %f2,%f4,%f4 1265 fmuld %f0,%f20,%f20 1266 ldd [%l4+%l0],%f2 1267 fmuld %f4,%f32,%f4 1268 fmuld %f20,%f34,%f20 1269 faddd %f20,%f4,%f20 1270 faddd %f20,%f2,%f20 1271 ba,pt %icc,2f 1272! delay slot 1273 faddd %f20,%f32,%f20 12741: 1275 fmuld %f0,%f0,%f2 1276 fmuld %f2,%f52,%f4 1277 faddd %f4,%f50,%f4 1278 fmuld %f2,%f4,%f4 1279 faddd %f4,%f48,%f4 1280 fmuld %f2,%f4,%f4 1281 faddd %f4,%f46,%f4 1282 fmuld %f2,%f4,%f4 1283 fmuld %f0,%f4,%f4 1284 faddd %f0,%f4,%f20 12852: 1286 fors %f20,%f9,%f20 1287 st %f20,[%o0] 1288 st %f21,[%o0+4] 1289 1290.endloop0: 1291 st %f6,[%o3] 1292 st %f7,[%o3+4] 1293 st %f16,[%o4] 1294 st %f17,[%o4+4] 1295 st %f26,[%o5] 1296 st %f27,[%o5+4] 1297 1298! return. finished off with only primary range arguments. 1299 1300 ret 1301 restore 1302 1303 1304 .align 32 1305.range0: 1306 cmp %l0,LIM_l6 1307 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny 1308! delay slot, annulled if branch not taken 1309 mov 0x1,LIM_l6 ! set "processing loop0" 1310 st %f0,[%o0] ! *y = *x with inexact if x nonzero 1311 st %f1,[%o0+4] 1312 fdtoi %f0,%f2 1313 addcc %i0,-1,%i0 1314 ble,pn %icc,.endloop0 1315! delay slot, harmless if branch taken 1316 add %i3,%i4,%i3 ! y += stridey 1317 andn %l1,%i5,%l0 ! hx &= ~0x80000000 1318 fmovd %f10,%f0 1319 ba,pt %icc,.loop0 1320! delay slot 1321 add %i1,%i2,%i1 ! x += stridex 1322 1323 1324 .align 32 1325.range1: 1326 cmp %l1,LIM_l6 1327 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny 1328! delay slot, annulled if branch not taken 1329 mov 0x2,LIM_l6 ! set "processing loop1" 1330 st %f10,[%o1] ! *y = *x with inexact if x nonzero 1331 st %f11,[%o1+4] 1332 fdtoi %f10,%f12 1333 addcc %i0,-1,%i0 1334 ble,pn %icc,.endloop1 1335! delay slot, harmless if branch taken 1336 add %i3,%i4,%i3 ! y += stridey 1337 andn %l2,%i5,%l1 ! hx &= ~0x80000000 1338 fmovd %f20,%f10 1339 ba,pt %icc,.loop1 1340! delay slot 1341 add %i1,%i2,%i1 ! x += stridex 1342 1343 1344 .align 32 1345.range2: 1346 cmp %l2,LIM_l6 1347 bg,a,pt %icc,.MEDIUM ! branch if x is not tiny 1348! delay slot, annulled if branch not taken 1349 mov 0x3,LIM_l6 ! set "processing loop2" 1350 st %f20,[%o2] ! *y = *x with inexact if x nonzero 1351 st %f21,[%o2+4] 1352 fdtoi %f20,%f22 13531: 1354 addcc %i0,-1,%i0 1355 ble,pn %icc,.endloop2 1356! delay slot 1357 nop 1358 ld [%i1],%l2 1359 ld [%i1],%f20 1360 ld [%i1+4],%f21 1361 andn %l2,%i5,%l2 ! hx &= ~0x80000000 1362 ba,pt %icc,.loop2 1363! delay slot 1364 add %i1,%i2,%i1 ! x += stridex 1365 1366 1367 .align 32 1368.MEDIUM: 1369 1370! ========== medium range ========== 1371 1372! register use 1373 1374! i0 n 1375! i1 x 1376! i2 stridex 1377! i3 y 1378! i4 stridey 1379! i5 0x80000000 1380 1381! l0 hx0 1382! l1 hx1 1383! l2 hx2 1384! l3 __vlibm_TBL_sincos_hi 1385! l4 __vlibm_TBL_sincos_lo 1386! l5 constants 1387! l6 in transition from pri-range and here, use for biguns 1388! l7 0x413921fb 1389 1390! the following are 64-bit registers in both V8+ and V9 1391 1392! g1 scratch 1393! g5 1394 1395! o0 py0 1396! o1 py1 1397! o2 py2 1398! o3 n0 1399! o4 n1 1400! o5 n2 1401! o7 scratch 1402 1403! f0 x0 1404! f2 n0,y0 1405! f4 1406! f6 1407! f8 scratch for table base 1408! f9 signbit0 1409! f10 x1 1410! f12 n1,y1 1411! f14 1412! f16 1413! f18 scratch for table base 1414! f19 signbit1 1415! f20 x2 1416! f22 n2,y2 1417! f24 1418! f26 1419! f28 scratch for table base 1420! f29 signbit2 1421! f30 0x80000000 1422! f31 0x4000 1423! f32 1424! f34 1425! f36 1426! f38 1427! f40 invpio2 1428! f42 round 1429! f44 0xffff800000000000 1430! f46 pio2_1 1431! f48 pio2_2 1432! f50 pio2_3 1433! f52 pio2_3t 1434! f54 one 1435! f56 pp1 1436! f58 pp2 1437! f60 qq1 1438! f62 qq2 1439 1440 PIC_SET(g5,constants,l5) 1441 1442 ! %o3,%o4,%o5 need to be stored 1443 st %f6,[%o3] 1444 sethi %hi(0x413921fb),%l7 1445 st %f7,[%o3+4] 1446 or %l7,%lo(0x413921fb),%l7 1447 st %f16,[%o4] 1448 st %f17,[%o4+4] 1449 st %f26,[%o5] 1450 st %f27,[%o5+4] 1451 ldd [%l5+invpio2],%f40 1452 ldd [%l5+round],%f42 1453 ldd [%l5+pio2_1],%f46 1454 ldd [%l5+pio2_2],%f48 1455 ldd [%l5+pio2_3],%f50 1456 ldd [%l5+pio2_3t],%f52 1457 std %f54,[%fp+x0_1+8] ! set up stack data 1458 std %f54,[%fp+x1_1+8] 1459 std %f54,[%fp+x2_1+8] 1460 stx %g0,[%fp+y0_0+8] 1461 stx %g0,[%fp+y1_0+8] 1462 stx %g0,[%fp+y2_0+8] 1463 1464! branched here in the middle of the array. Need to adjust 1465! for the members of the triple that were selected in the primary 1466! loop. 1467 1468! no adjustment since all three selected here 1469 subcc LIM_l6,0x1,%g0 ! continue in LOOP0? 1470 bz,a %icc,.LOOP0 1471 mov 0x0,LIM_l6 ! delay slot set biguns=0 1472 1473! ajust 1st triple since 2d and 3d done here 1474 subcc LIM_l6,0x2,%g0 ! continue in LOOP1? 1475 fors %f0,%f9,%f0 ! restore sign bit 1476 fmuld %f0,%f40,%f2 ! adj LOOP0 1477 bz,a %icc,.LOOP1 1478 mov 0x0,LIM_l6 ! delay slot set biguns=0 1479 1480! ajust 1st and 2d triple since 3d done here 1481 subcc LIM_l6,0x3,%g0 ! continue in LOOP2? 1482 !done fmuld %f0,%f40,%f2 ! adj LOOP0 1483 sub %i3,%i4,%i3 ! adjust to not double increment 1484 fors %f10,%f19,%f10 ! restore sign bit 1485 fmuld %f10,%f40,%f12 ! adj LOOP1 1486 faddd %f2,%f42,%f2 ! adj LOOP1 1487 bz,a %icc,.LOOP2 1488 mov 0x0,LIM_l6 ! delay slot set biguns=0 1489 1490 .align 32 1491.LOOP0: 1492 lda [%i1]%asi,%l1 ! preload next argument 1493 mov %i3,%o0 ! py0 = y 1494 lda [%i1]%asi,%f10 1495 cmp %l0,%l7 1496 add %i3,%i4,%i3 ! y += stridey 1497 bg,pn %icc,.BIG0 ! if hx > 0x413921fb 1498 1499! delay slot 1500 lda [%i1+4]%asi,%f11 1501 addcc %i0,-1,%i0 1502 add %i1,%i2,%i1 ! x += stridex 1503 ble,pn %icc,.ENDLOOP1 1504 1505! delay slot 1506 andn %l1,%i5,%l1 1507 nop 1508 fmuld %f0,%f40,%f2 1509 fabsd %f54,%f54 ! a nop for alignment only 1510 1511.LOOP1: 1512 lda [%i1]%asi,%l2 ! preload next argument 1513 mov %i3,%o1 ! py1 = y 1514 1515 lda [%i1]%asi,%f20 1516 cmp %l1,%l7 1517 add %i3,%i4,%i3 ! y += stridey 1518 bg,pn %icc,.BIG1 ! if hx > 0x413921fb 1519 1520! delay slot 1521 lda [%i1+4]%asi,%f21 1522 addcc %i0,-1,%i0 1523 add %i1,%i2,%i1 ! x += stridex 1524 ble,pn %icc,.ENDLOOP2 1525 1526! delay slot 1527 andn %l2,%i5,%l2 1528 nop 1529 fmuld %f10,%f40,%f12 1530 faddd %f2,%f42,%f2 1531 1532.LOOP2: 1533 st %f3,[%fp+n0] 1534 mov %i3,%o2 ! py2 = y 1535 1536 cmp %l2,%l7 1537 add %i3,%i4,%i3 ! y += stridey 1538 fmuld %f20,%f40,%f22 1539 bg,pn %icc,.BIG2 ! if hx > 0x413921fb 1540 1541! delay slot 1542 add %l5,thresh+4,%o7 1543 faddd %f12,%f42,%f12 1544 st %f13,[%fp+n1] 1545 1546! - 1547 1548 add %l5,thresh,%g1 1549 faddd %f22,%f42,%f22 1550 st %f23,[%fp+n2] 1551 1552 fsubd %f2,%f42,%f2 ! n 1553 1554 fsubd %f12,%f42,%f12 ! n 1555 1556 fsubd %f22,%f42,%f22 ! n 1557 1558 fmuld %f2,%f46,%f4 1559 1560 fmuld %f12,%f46,%f14 1561 1562 fmuld %f22,%f46,%f24 1563 1564 fsubd %f0,%f4,%f4 1565 fmuld %f2,%f48,%f6 1566 1567 fsubd %f10,%f14,%f14 1568 fmuld %f12,%f48,%f16 1569 1570 fsubd %f20,%f24,%f24 1571 fmuld %f22,%f48,%f26 1572 1573 fsubd %f4,%f6,%f0 1574 ld [%fp+n0],%o3 1575 1576 fsubd %f14,%f16,%f10 1577 ld [%fp+n1],%o4 1578 1579 fsubd %f24,%f26,%f20 1580 ld [%fp+n2],%o5 1581 1582 fsubd %f4,%f0,%f32 1583 and %o3,1,%o3 1584 1585 fsubd %f14,%f10,%f34 1586 and %o4,1,%o4 1587 1588 fsubd %f24,%f20,%f36 1589 and %o5,1,%o5 1590 1591 fsubd %f32,%f6,%f32 1592 fmuld %f2,%f50,%f8 1593 sll %o3,3,%o3 1594 1595 fsubd %f34,%f16,%f34 1596 fmuld %f12,%f50,%f18 1597 sll %o4,3,%o4 1598 1599 fsubd %f36,%f26,%f36 1600 fmuld %f22,%f50,%f28 1601 sll %o5,3,%o5 1602 1603 fsubd %f8,%f32,%f8 1604 ld [%g1+%o3],%f6 1605 1606 fsubd %f18,%f34,%f18 1607 ld [%g1+%o4],%f16 1608 1609 fsubd %f28,%f36,%f28 1610 ld [%g1+%o5],%f26 1611 1612 fsubd %f0,%f8,%f4 1613 1614 fsubd %f10,%f18,%f14 1615 1616 fsubd %f20,%f28,%f24 1617 1618 fsubd %f0,%f4,%f32 1619 1620 fsubd %f10,%f14,%f34 1621 1622 fsubd %f20,%f24,%f36 1623 1624 fsubd %f32,%f8,%f32 1625 fmuld %f2,%f52,%f2 1626 1627 fsubd %f34,%f18,%f34 1628 fmuld %f12,%f52,%f12 1629 1630 fsubd %f36,%f28,%f36 1631 fmuld %f22,%f52,%f22 1632 1633 fsubd %f2,%f32,%f2 1634 ld [%o7+%o3],%f8 1635 1636 fsubd %f12,%f34,%f12 1637 ld [%o7+%o4],%f18 1638 1639 fsubd %f22,%f36,%f22 1640 ld [%o7+%o5],%f28 1641 1642 fsubd %f4,%f2,%f0 ! x 1643 1644 fsubd %f14,%f12,%f10 ! x 1645 1646 fsubd %f24,%f22,%f20 ! x 1647 1648 fsubd %f4,%f0,%f4 1649 1650 fsubd %f14,%f10,%f14 1651 1652 fsubd %f24,%f20,%f24 1653 1654 fands %f0,%f30,%f9 ! save signbit 1655 1656 fands %f10,%f30,%f19 ! save signbit 1657 1658 fands %f20,%f30,%f29 ! save signbit 1659 1660 fabsd %f0,%f0 1661 std %f0,[%fp+x0_1] 1662 1663 fabsd %f10,%f10 1664 std %f10,[%fp+x1_1] 1665 1666 fabsd %f20,%f20 1667 std %f20,[%fp+x2_1] 1668 1669 fsubd %f4,%f2,%f2 ! y 1670 1671 fsubd %f14,%f12,%f12 ! y 1672 1673 fsubd %f24,%f22,%f22 ! y 1674 1675 fcmpgt32 %f6,%f0,%l0 1676 1677 fcmpgt32 %f16,%f10,%l1 1678 1679 fcmpgt32 %f26,%f20,%l2 1680 1681! -- 16 byte aligned 1682 fxors %f2,%f9,%f2 1683 1684 fxors %f12,%f19,%f12 1685 1686 fxors %f22,%f29,%f22 1687 1688 fands %f9,%f8,%f9 ! if (n & 1) clear sign bit 1689 andcc %l0,2,%g0 1690 bne,pn %icc,.CASE4 1691 1692! delay slot 1693 fands %f19,%f18,%f19 ! if (n & 1) clear sign bit 1694 andcc %l1,2,%g0 1695 bne,pn %icc,.CASE2 1696 1697! delay slot 1698 fands %f29,%f28,%f29 ! if (n & 1) clear sign bit 1699 andcc %l2,2,%g0 1700 bne,pn %icc,.CASE1 1701 1702! delay slot 1703 fpadd32s %f0,%f31,%f8 1704 sethi %hi(0x3fc3c000),%o7 1705 ld [%fp+x0_1],%l0 1706 1707 fpadd32s %f10,%f31,%f18 1708 add %l3,8,%g1 1709 ld [%fp+x1_1],%l1 1710 1711 fpadd32s %f20,%f31,%f28 1712 ld [%fp+x2_1],%l2 1713 1714 fand %f8,%f44,%f4 1715 sub %l0,%o7,%l0 1716 1717 fand %f18,%f44,%f14 1718 sub %l1,%o7,%l1 1719 1720 fand %f28,%f44,%f24 1721 sub %l2,%o7,%l2 1722 1723 fsubd %f0,%f4,%f0 1724 srl %l0,10,%l0 1725 1726 fsubd %f10,%f14,%f10 1727 srl %l1,10,%l1 1728 1729 fsubd %f20,%f24,%f20 1730 srl %l2,10,%l2 1731 1732 faddd %f0,%f2,%f0 1733 andn %l0,0x1f,%l0 1734 1735 faddd %f10,%f12,%f10 1736 andn %l1,0x1f,%l1 1737 1738 faddd %f20,%f22,%f20 1739 andn %l2,0x1f,%l2 1740 1741 fmuld %f0,%f0,%f2 1742 add %l0,%o3,%l0 1743 1744 fmuld %f10,%f10,%f12 1745 add %l1,%o4,%l1 1746 1747 fmuld %f20,%f20,%f22 1748 add %l2,%o5,%l2 1749 1750 fmuld %f2,%f58,%f6 1751 ldd [%l3+%l0],%f32 1752 1753 fmuld %f12,%f58,%f16 1754 ldd [%l3+%l1],%f34 1755 1756 fmuld %f22,%f58,%f26 1757 ldd [%l3+%l2],%f36 1758 1759 faddd %f6,%f56,%f6 1760 fmuld %f2,%f62,%f4 1761 1762 faddd %f16,%f56,%f16 1763 fmuld %f12,%f62,%f14 1764 1765 faddd %f26,%f56,%f26 1766 fmuld %f22,%f62,%f24 1767 1768 fmuld %f2,%f6,%f6 1769 faddd %f4,%f60,%f4 1770 1771 fmuld %f12,%f16,%f16 1772 faddd %f14,%f60,%f14 1773 1774 fmuld %f22,%f26,%f26 1775 faddd %f24,%f60,%f24 1776 1777 faddd %f6,%f54,%f6 1778 fmuld %f2,%f4,%f4 1779 1780 faddd %f16,%f54,%f16 1781 fmuld %f12,%f14,%f14 1782 1783 faddd %f26,%f54,%f26 1784 fmuld %f22,%f24,%f24 1785 1786 fmuld %f0,%f6,%f6 1787 ldd [%g1+%l0],%f2 1788 1789 fmuld %f10,%f16,%f16 1790 ldd [%g1+%l1],%f12 1791 1792 fmuld %f20,%f26,%f26 1793 ldd [%g1+%l2],%f22 1794 1795 fmuld %f4,%f32,%f4 1796 ldd [%l4+%l0],%f0 1797 1798 fmuld %f14,%f34,%f14 1799 ldd [%l4+%l1],%f10 1800 1801 fmuld %f24,%f36,%f24 1802 ldd [%l4+%l2],%f20 1803 1804 fmuld %f6,%f2,%f6 1805 1806 fmuld %f16,%f12,%f16 1807 1808 fmuld %f26,%f22,%f26 1809 1810 faddd %f6,%f4,%f6 1811 1812 faddd %f16,%f14,%f16 1813 1814 faddd %f26,%f24,%f26 1815 1816 faddd %f6,%f0,%f6 1817 1818 faddd %f16,%f10,%f16 1819 1820 faddd %f26,%f20,%f26 1821 1822 faddd %f6,%f32,%f6 1823 1824 faddd %f16,%f34,%f16 1825 1826 faddd %f26,%f36,%f26 1827 1828.FIXSIGN: 1829 ld [%fp+n0],%o3 1830 add %l5,thresh-4,%g1 1831 1832 ld [%fp+n1],%o4 1833 1834 ld [%fp+n2],%o5 1835 and %o3,2,%o3 1836 1837 sll %o3,2,%o3 1838 and %o4,2,%o4 1839 lda [%i1]%asi,%l0 ! preload next argument 1840 1841 sll %o4,2,%o4 1842 and %o5,2,%o5 1843 ld [%g1+%o3],%f8 1844 1845 sll %o5,2,%o5 1846 ld [%g1+%o4],%f18 1847 1848 ld [%g1+%o5],%f28 1849 fxors %f9,%f8,%f9 1850 1851 lda [%i1]%asi,%f0 1852 fxors %f29,%f28,%f29 1853 1854 lda [%i1+4]%asi,%f1 1855 fxors %f19,%f18,%f19 1856 1857 fors %f6,%f9,%f6 ! tack on sign 1858 add %i1,%i2,%i1 ! x += stridex 1859 st %f6,[%o0] 1860 1861 fors %f26,%f29,%f26 ! tack on sign 1862 st %f7,[%o0+4] 1863 1864 fors %f16,%f19,%f16 ! tack on sign 1865 st %f26,[%o2] 1866 1867 st %f27,[%o2+4] 1868 addcc %i0,-1,%i0 1869 1870 st %f16,[%o1] 1871 andn %l0,%i5,%l0 ! hx &= ~0x80000000 1872 bg,pt %icc,.LOOP0 1873 1874! delay slot 1875 st %f17,[%o1+4] 1876 1877 ba,pt %icc,.ENDLOOP0 1878! delay slot 1879 nop 1880 1881 .align 32 1882.CASE1: 1883 fpadd32s %f10,%f31,%f18 1884 sethi %hi(0x3fc3c000),%o7 1885 ld [%fp+x0_1],%l0 1886 1887 fand %f8,%f44,%f4 1888 add %l3,8,%g1 1889 ld [%fp+x1_1],%l1 1890 1891 fand %f18,%f44,%f14 1892 sub %l0,%o7,%l0 1893 1894 fsubd %f0,%f4,%f0 1895 srl %l0,10,%l0 1896 sub %l1,%o7,%l1 1897 1898 fsubd %f10,%f14,%f10 1899 srl %l1,10,%l1 1900 1901 fmuld %f20,%f20,%f20 1902 ldd [%l5+%o5],%f36 1903 add %l5,%o5,%l2 1904 1905 faddd %f0,%f2,%f0 1906 andn %l0,0x1f,%l0 1907 1908 faddd %f10,%f12,%f10 1909 andn %l1,0x1f,%l1 1910 1911 fmuld %f20,%f36,%f24 1912 ldd [%l2+0x10],%f26 1913 add %fp,%o5,%o5 1914 1915 fmuld %f0,%f0,%f2 1916 add %l0,%o3,%l0 1917 1918 fmuld %f10,%f10,%f12 1919 add %l1,%o4,%l1 1920 1921 faddd %f24,%f26,%f24 1922 ldd [%l2+0x20],%f36 1923 1924 fmuld %f2,%f58,%f6 1925 ldd [%l3+%l0],%f32 1926 1927 fmuld %f12,%f58,%f16 1928 ldd [%l3+%l1],%f34 1929 1930 fmuld %f20,%f24,%f24 1931 ldd [%l2+0x30],%f26 1932 1933 faddd %f6,%f56,%f6 1934 fmuld %f2,%f62,%f4 1935 1936 faddd %f16,%f56,%f16 1937 fmuld %f12,%f62,%f14 1938 1939 faddd %f24,%f36,%f24 1940 ldd [%o5+x2_1],%f36 1941 1942 fmuld %f2,%f6,%f6 1943 faddd %f4,%f60,%f4 1944 1945 fmuld %f12,%f16,%f16 1946 faddd %f14,%f60,%f14 1947 1948 fmuld %f20,%f24,%f24 1949 1950 faddd %f6,%f54,%f6 1951 fmuld %f2,%f4,%f4 1952 ldd [%g1+%l0],%f2 1953 1954 faddd %f16,%f54,%f16 1955 fmuld %f12,%f14,%f14 1956 ldd [%g1+%l1],%f12 1957 1958 faddd %f24,%f26,%f24 1959 1960 fmuld %f0,%f6,%f6 1961 ldd [%l4+%l0],%f0 1962 1963 fmuld %f10,%f16,%f16 1964 ldd [%l4+%l1],%f10 1965 1966 fmuld %f4,%f32,%f4 1967 std %f22,[%fp+y2_0] 1968 1969 fmuld %f14,%f34,%f14 1970 1971 fmuld %f6,%f2,%f6 1972 1973 fmuld %f16,%f12,%f16 1974 1975 fmuld %f20,%f24,%f24 1976 1977 faddd %f6,%f4,%f6 1978 1979 faddd %f16,%f14,%f16 1980 1981 fmuld %f36,%f24,%f24 1982 ldd [%o5+y2_0],%f22 1983 1984 faddd %f6,%f0,%f6 1985 1986 faddd %f16,%f10,%f16 1987 1988 faddd %f24,%f22,%f24 1989 1990 faddd %f6,%f32,%f6 1991 1992 faddd %f16,%f34,%f16 1993 ba,pt %icc,.FIXSIGN 1994 1995! delay slot 1996 faddd %f36,%f24,%f26 1997 1998 .align 32 1999.CASE2: 2000 fpadd32s %f0,%f31,%f8 2001 ld [%fp+x0_1],%l0 2002 andcc %l2,2,%g0 2003 bne,pn %icc,.CASE3 2004 2005! delay slot 2006 sethi %hi(0x3fc3c000),%o7 2007 fpadd32s %f20,%f31,%f28 2008 ld [%fp+x2_1],%l2 2009 2010 fand %f8,%f44,%f4 2011 sub %l0,%o7,%l0 2012 add %l3,8,%g1 2013 2014 fand %f28,%f44,%f24 2015 sub %l2,%o7,%l2 2016 2017 fsubd %f0,%f4,%f0 2018 srl %l0,10,%l0 2019 2020 fsubd %f20,%f24,%f20 2021 srl %l2,10,%l2 2022 2023 fmuld %f10,%f10,%f10 2024 ldd [%l5+%o4],%f34 2025 add %l5,%o4,%l1 2026 2027 faddd %f0,%f2,%f0 2028 andn %l0,0x1f,%l0 2029 2030 faddd %f20,%f22,%f20 2031 andn %l2,0x1f,%l2 2032 2033 fmuld %f10,%f34,%f14 2034 ldd [%l1+0x10],%f16 2035 add %fp,%o4,%o4 2036 2037 fmuld %f0,%f0,%f2 2038 add %l0,%o3,%l0 2039 2040 fmuld %f20,%f20,%f22 2041 add %l2,%o5,%l2 2042 2043 faddd %f14,%f16,%f14 2044 ldd [%l1+0x20],%f34 2045 2046 fmuld %f2,%f58,%f6 2047 ldd [%l3+%l0],%f32 2048 2049 fmuld %f22,%f58,%f26 2050 ldd [%l3+%l2],%f36 2051 2052 fmuld %f10,%f14,%f14 2053 ldd [%l1+0x30],%f16 2054 2055 faddd %f6,%f56,%f6 2056 fmuld %f2,%f62,%f4 2057 2058 faddd %f26,%f56,%f26 2059 fmuld %f22,%f62,%f24 2060 2061 faddd %f14,%f34,%f14 2062 ldd [%o4+x1_1],%f34 2063 2064 fmuld %f2,%f6,%f6 2065 faddd %f4,%f60,%f4 2066 2067 fmuld %f22,%f26,%f26 2068 faddd %f24,%f60,%f24 2069 2070 fmuld %f10,%f14,%f14 2071 2072 faddd %f6,%f54,%f6 2073 fmuld %f2,%f4,%f4 2074 ldd [%g1+%l0],%f2 2075 2076 faddd %f26,%f54,%f26 2077 fmuld %f22,%f24,%f24 2078 ldd [%g1+%l2],%f22 2079 2080 faddd %f14,%f16,%f14 2081 2082 fmuld %f0,%f6,%f6 2083 ldd [%l4+%l0],%f0 2084 2085 fmuld %f20,%f26,%f26 2086 ldd [%l4+%l2],%f20 2087 2088 fmuld %f4,%f32,%f4 2089 std %f12,[%fp+y1_0] 2090 2091 fmuld %f24,%f36,%f24 2092 2093 fmuld %f6,%f2,%f6 2094 2095 fmuld %f26,%f22,%f26 2096 2097 fmuld %f10,%f14,%f14 2098 2099 faddd %f6,%f4,%f6 2100 2101 faddd %f26,%f24,%f26 2102 2103 fmuld %f34,%f14,%f14 2104 ldd [%o4+y1_0],%f12 2105 2106 faddd %f6,%f0,%f6 2107 2108 faddd %f26,%f20,%f26 2109 2110 faddd %f14,%f12,%f14 2111 2112 faddd %f6,%f32,%f6 2113 2114 faddd %f26,%f36,%f26 2115 ba,pt %icc,.FIXSIGN 2116 2117! delay slot 2118 faddd %f34,%f14,%f16 2119 2120 .align 32 2121.CASE3: 2122 fand %f8,%f44,%f4 2123 add %l3,8,%g1 2124 sub %l0,%o7,%l0 2125 2126 fmuld %f10,%f10,%f10 2127 ldd [%l5+%o4],%f34 2128 add %l5,%o4,%l1 2129 2130 fsubd %f0,%f4,%f0 2131 srl %l0,10,%l0 2132 2133 fmuld %f20,%f20,%f20 2134 ldd [%l5+%o5],%f36 2135 add %l5,%o5,%l2 2136 2137 fmuld %f10,%f34,%f14 2138 ldd [%l1+0x10],%f16 2139 add %fp,%o4,%o4 2140 2141 faddd %f0,%f2,%f0 2142 andn %l0,0x1f,%l0 2143 2144 fmuld %f20,%f36,%f24 2145 ldd [%l2+0x10],%f26 2146 add %fp,%o5,%o5 2147 2148 faddd %f14,%f16,%f14 2149 ldd [%l1+0x20],%f34 2150 2151 fmuld %f0,%f0,%f2 2152 add %l0,%o3,%l0 2153 2154 faddd %f24,%f26,%f24 2155 ldd [%l2+0x20],%f36 2156 2157 fmuld %f10,%f14,%f14 2158 ldd [%l1+0x30],%f16 2159 2160 fmuld %f2,%f58,%f6 2161 ldd [%l3+%l0],%f32 2162 2163 fmuld %f20,%f24,%f24 2164 ldd [%l2+0x30],%f26 2165 2166 faddd %f14,%f34,%f14 2167 ldd [%o4+x1_1],%f34 2168 2169 faddd %f6,%f56,%f6 2170 fmuld %f2,%f62,%f4 2171 2172 faddd %f24,%f36,%f24 2173 ldd [%o5+x2_1],%f36 2174 2175 fmuld %f10,%f14,%f14 2176 std %f12,[%fp+y1_0] 2177 2178 fmuld %f2,%f6,%f6 2179 faddd %f4,%f60,%f4 2180 2181 fmuld %f20,%f24,%f24 2182 std %f22,[%fp+y2_0] 2183 2184 faddd %f14,%f16,%f14 2185 2186 faddd %f6,%f54,%f6 2187 fmuld %f2,%f4,%f4 2188 ldd [%g1+%l0],%f2 2189 2190 faddd %f24,%f26,%f24 2191 2192 fmuld %f10,%f14,%f14 2193 2194 fmuld %f0,%f6,%f6 2195 ldd [%l4+%l0],%f0 2196 2197 fmuld %f4,%f32,%f4 2198 2199 fmuld %f20,%f24,%f24 2200 2201 fmuld %f6,%f2,%f6 2202 2203 fmuld %f34,%f14,%f14 2204 ldd [%o4+y1_0],%f12 2205 2206 fmuld %f36,%f24,%f24 2207 ldd [%o5+y2_0],%f22 2208 2209 faddd %f6,%f4,%f6 2210 2211 faddd %f14,%f12,%f14 2212 2213 faddd %f24,%f22,%f24 2214 2215 faddd %f6,%f0,%f6 2216 2217 faddd %f34,%f14,%f16 2218 2219 faddd %f36,%f24,%f26 2220 ba,pt %icc,.FIXSIGN 2221 2222! delay slot 2223 faddd %f6,%f32,%f6 2224 2225 .align 32 2226.CASE4: 2227 fands %f29,%f28,%f29 ! if (n & 1) clear sign bit 2228 sethi %hi(0x3fc3c000),%o7 2229 andcc %l1,2,%g0 2230 bne,pn %icc,.CASE6 2231 2232! delay slot 2233 andcc %l2,2,%g0 2234 fpadd32s %f10,%f31,%f18 2235 ld [%fp+x1_1],%l1 2236 bne,pn %icc,.CASE5 2237 2238! delay slot 2239 add %l3,8,%g1 2240 ld [%fp+x2_1],%l2 2241 fpadd32s %f20,%f31,%f28 2242 2243 fand %f18,%f44,%f14 2244 sub %l1,%o7,%l1 2245 2246 fand %f28,%f44,%f24 2247 sub %l2,%o7,%l2 2248 2249 fsubd %f10,%f14,%f10 2250 srl %l1,10,%l1 2251 2252 fsubd %f20,%f24,%f20 2253 srl %l2,10,%l2 2254 2255 fmuld %f0,%f0,%f0 2256 ldd [%l5+%o3],%f32 2257 add %l5,%o3,%l0 2258 2259 faddd %f10,%f12,%f10 2260 andn %l1,0x1f,%l1 2261 2262 faddd %f20,%f22,%f20 2263 andn %l2,0x1f,%l2 2264 2265 fmuld %f0,%f32,%f4 2266 ldd [%l0+0x10],%f6 2267 add %fp,%o3,%o3 2268 2269 fmuld %f10,%f10,%f12 2270 add %l1,%o4,%l1 2271 2272 fmuld %f20,%f20,%f22 2273 add %l2,%o5,%l2 2274 2275 faddd %f4,%f6,%f4 2276 ldd [%l0+0x20],%f32 2277 2278 fmuld %f12,%f58,%f16 2279 ldd [%l3+%l1],%f34 2280 2281 fmuld %f22,%f58,%f26 2282 ldd [%l3+%l2],%f36 2283 2284 fmuld %f0,%f4,%f4 2285 ldd [%l0+0x30],%f6 2286 2287 faddd %f16,%f56,%f16 2288 fmuld %f12,%f62,%f14 2289 2290 faddd %f26,%f56,%f26 2291 fmuld %f22,%f62,%f24 2292 2293 faddd %f4,%f32,%f4 2294 ldd [%o3+x0_1],%f32 2295 2296 fmuld %f12,%f16,%f16 2297 faddd %f14,%f60,%f14 2298 2299 fmuld %f22,%f26,%f26 2300 faddd %f24,%f60,%f24 2301 2302 fmuld %f0,%f4,%f4 2303 2304 faddd %f16,%f54,%f16 2305 fmuld %f12,%f14,%f14 2306 ldd [%g1+%l1],%f12 2307 2308 faddd %f26,%f54,%f26 2309 fmuld %f22,%f24,%f24 2310 ldd [%g1+%l2],%f22 2311 2312 faddd %f4,%f6,%f4 2313 2314 fmuld %f10,%f16,%f16 2315 ldd [%l4+%l1],%f10 2316 2317 fmuld %f20,%f26,%f26 2318 ldd [%l4+%l2],%f20 2319 2320 fmuld %f14,%f34,%f14 2321 std %f2,[%fp+y0_0] 2322 2323 fmuld %f24,%f36,%f24 2324 2325 fmuld %f0,%f4,%f4 2326 2327 fmuld %f16,%f12,%f16 2328 2329 fmuld %f26,%f22,%f26 2330 2331 fmuld %f32,%f4,%f4 2332 ldd [%o3+y0_0],%f2 2333 2334 faddd %f16,%f14,%f16 2335 2336 faddd %f26,%f24,%f26 2337 2338 faddd %f4,%f2,%f4 2339 2340 faddd %f16,%f10,%f16 2341 2342 faddd %f26,%f20,%f26 2343 2344 faddd %f32,%f4,%f6 2345 2346 faddd %f16,%f34,%f16 2347 ba,pt %icc,.FIXSIGN 2348 2349! delay slot 2350 faddd %f26,%f36,%f26 2351 2352 .align 32 2353.CASE5: 2354 fand %f18,%f44,%f14 2355 sub %l1,%o7,%l1 2356 2357 fmuld %f0,%f0,%f0 2358 ldd [%l5+%o3],%f32 2359 add %l5,%o3,%l0 2360 2361 fsubd %f10,%f14,%f10 2362 srl %l1,10,%l1 2363 2364 fmuld %f20,%f20,%f20 2365 ldd [%l5+%o5],%f36 2366 add %l5,%o5,%l2 2367 2368 fmuld %f0,%f32,%f4 2369 ldd [%l0+0x10],%f6 2370 add %fp,%o3,%o3 2371 2372 faddd %f10,%f12,%f10 2373 andn %l1,0x1f,%l1 2374 2375 fmuld %f20,%f36,%f24 2376 ldd [%l2+0x10],%f26 2377 add %fp,%o5,%o5 2378 2379 faddd %f4,%f6,%f4 2380 ldd [%l0+0x20],%f32 2381 2382 fmuld %f10,%f10,%f12 2383 add %l1,%o4,%l1 2384 2385 faddd %f24,%f26,%f24 2386 ldd [%l2+0x20],%f36 2387 2388 fmuld %f0,%f4,%f4 2389 ldd [%l0+0x30],%f6 2390 2391 fmuld %f12,%f58,%f16 2392 ldd [%l3+%l1],%f34 2393 2394 fmuld %f20,%f24,%f24 2395 ldd [%l2+0x30],%f26 2396 2397 faddd %f4,%f32,%f4 2398 ldd [%o3+x0_1],%f32 2399 2400 faddd %f16,%f56,%f16 2401 fmuld %f12,%f62,%f14 2402 2403 faddd %f24,%f36,%f24 2404 ldd [%o5+x2_1],%f36 2405 2406 fmuld %f0,%f4,%f4 2407 std %f2,[%fp+y0_0] 2408 2409 fmuld %f12,%f16,%f16 2410 faddd %f14,%f60,%f14 2411 2412 fmuld %f20,%f24,%f24 2413 std %f22,[%fp+y2_0] 2414 2415 faddd %f4,%f6,%f4 2416 2417 faddd %f16,%f54,%f16 2418 fmuld %f12,%f14,%f14 2419 ldd [%g1+%l1],%f12 2420 2421 faddd %f24,%f26,%f24 2422 2423 fmuld %f0,%f4,%f4 2424 2425 fmuld %f10,%f16,%f16 2426 ldd [%l4+%l1],%f10 2427 2428 fmuld %f14,%f34,%f14 2429 2430 fmuld %f20,%f24,%f24 2431 2432 fmuld %f16,%f12,%f16 2433 2434 fmuld %f32,%f4,%f4 2435 ldd [%o3+y0_0],%f2 2436 2437 fmuld %f36,%f24,%f24 2438 ldd [%o5+y2_0],%f22 2439 2440 faddd %f16,%f14,%f16 2441 2442 faddd %f4,%f2,%f4 2443 2444 faddd %f24,%f22,%f24 2445 2446 faddd %f16,%f10,%f16 2447 2448 faddd %f32,%f4,%f6 2449 2450 faddd %f36,%f24,%f26 2451 ba,pt %icc,.FIXSIGN 2452 2453! delay slot 2454 faddd %f16,%f34,%f16 2455 2456 .align 32 2457.CASE6: 2458 ld [%fp+x2_1],%l2 2459 add %l3,8,%g1 2460 bne,pn %icc,.CASE7 2461! delay slot 2462 fpadd32s %f20,%f31,%f28 2463 2464 fand %f28,%f44,%f24 2465 ldd [%l5+%o3],%f32 2466 add %l5,%o3,%l0 2467 2468 fmuld %f0,%f0,%f0 2469 sub %l2,%o7,%l2 2470 2471 fsubd %f20,%f24,%f20 2472 srl %l2,10,%l2 2473 2474 fmuld %f10,%f10,%f10 2475 ldd [%l5+%o4],%f34 2476 add %l5,%o4,%l1 2477 2478 fmuld %f0,%f32,%f4 2479 ldd [%l0+0x10],%f6 2480 add %fp,%o3,%o3 2481 2482 faddd %f20,%f22,%f20 2483 andn %l2,0x1f,%l2 2484 2485 fmuld %f10,%f34,%f14 2486 ldd [%l1+0x10],%f16 2487 add %fp,%o4,%o4 2488 2489 faddd %f4,%f6,%f4 2490 ldd [%l0+0x20],%f32 2491 2492 fmuld %f20,%f20,%f22 2493 add %l2,%o5,%l2 2494 2495 faddd %f14,%f16,%f14 2496 ldd [%l1+0x20],%f34 2497 2498 fmuld %f0,%f4,%f4 2499 ldd [%l0+0x30],%f6 2500 2501 fmuld %f22,%f58,%f26 2502 ldd [%l3+%l2],%f36 2503 2504 fmuld %f10,%f14,%f14 2505 ldd [%l1+0x30],%f16 2506 2507 faddd %f4,%f32,%f4 2508 ldd [%o3+x0_1],%f32 2509 2510 faddd %f26,%f56,%f26 2511 fmuld %f22,%f62,%f24 2512 2513 faddd %f14,%f34,%f14 2514 ldd [%o4+x1_1],%f34 2515 2516 fmuld %f0,%f4,%f4 2517 std %f2,[%fp+y0_0] 2518 2519 fmuld %f22,%f26,%f26 2520 faddd %f24,%f60,%f24 2521 2522 fmuld %f10,%f14,%f14 2523 std %f12,[%fp+y1_0] 2524 2525 faddd %f4,%f6,%f4 2526 2527 faddd %f26,%f54,%f26 2528 fmuld %f22,%f24,%f24 2529 ldd [%g1+%l2],%f22 2530 2531 faddd %f14,%f16,%f14 2532 2533 fmuld %f0,%f4,%f4 2534 2535 fmuld %f20,%f26,%f26 2536 ldd [%l4+%l2],%f20 2537 2538 fmuld %f24,%f36,%f24 2539 2540 fmuld %f10,%f14,%f14 2541 2542 fmuld %f26,%f22,%f26 2543 2544 fmuld %f32,%f4,%f4 2545 ldd [%o3+y0_0],%f2 2546 2547 fmuld %f34,%f14,%f14 2548 ldd [%o4+y1_0],%f12 2549 2550 faddd %f26,%f24,%f26 2551 2552 faddd %f4,%f2,%f4 2553 2554 faddd %f14,%f12,%f14 2555 2556 faddd %f26,%f20,%f26 2557 2558 faddd %f32,%f4,%f6 2559 2560 faddd %f34,%f14,%f16 2561 ba,pt %icc,.FIXSIGN 2562 2563! delay slot 2564 faddd %f26,%f36,%f26 2565 2566 .align 32 2567.CASE7: 2568 fmuld %f0,%f0,%f0 2569 ldd [%l5+%o3],%f32 2570 add %l5,%o3,%l0 2571 2572 fmuld %f10,%f10,%f10 2573 ldd [%l5+%o4],%f34 2574 add %l5,%o4,%l1 2575 2576 fmuld %f20,%f20,%f20 2577 ldd [%l5+%o5],%f36 2578 add %l5,%o5,%l2 2579 2580 fmuld %f0,%f32,%f4 2581 ldd [%l0+0x10],%f6 2582 add %fp,%o3,%o3 2583 2584 fmuld %f10,%f34,%f14 2585 ldd [%l1+0x10],%f16 2586 add %fp,%o4,%o4 2587 2588 fmuld %f20,%f36,%f24 2589 ldd [%l2+0x10],%f26 2590 add %fp,%o5,%o5 2591 2592 faddd %f4,%f6,%f4 2593 ldd [%l0+0x20],%f32 2594 2595 faddd %f14,%f16,%f14 2596 ldd [%l1+0x20],%f34 2597 2598 faddd %f24,%f26,%f24 2599 ldd [%l2+0x20],%f36 2600 2601 fmuld %f0,%f4,%f4 2602 ldd [%l0+0x30],%f6 2603 2604 fmuld %f10,%f14,%f14 2605 ldd [%l1+0x30],%f16 2606 2607 fmuld %f20,%f24,%f24 2608 ldd [%l2+0x30],%f26 2609 2610 faddd %f4,%f32,%f4 2611 ldd [%o3+x0_1],%f32 2612 2613 faddd %f14,%f34,%f14 2614 ldd [%o4+x1_1],%f34 2615 2616 faddd %f24,%f36,%f24 2617 ldd [%o5+x2_1],%f36 2618 2619 fmuld %f0,%f4,%f4 2620 std %f2,[%fp+y0_0] 2621 2622 fmuld %f10,%f14,%f14 2623 std %f12,[%fp+y1_0] 2624 2625 fmuld %f20,%f24,%f24 2626 std %f22,[%fp+y2_0] 2627 2628 faddd %f4,%f6,%f4 2629 2630 faddd %f14,%f16,%f14 2631 2632 faddd %f24,%f26,%f24 2633 2634 fmuld %f0,%f4,%f4 2635 2636 fmuld %f10,%f14,%f14 2637 2638 fmuld %f20,%f24,%f24 2639 2640 fmuld %f32,%f4,%f4 2641 ldd [%o3+y0_0],%f2 2642 2643 fmuld %f34,%f14,%f14 2644 ldd [%o4+y1_0],%f12 2645 2646 fmuld %f36,%f24,%f24 2647 ldd [%o5+y2_0],%f22 2648 2649 faddd %f4,%f2,%f4 2650 2651 faddd %f14,%f12,%f14 2652 2653 faddd %f24,%f22,%f24 2654 2655 faddd %f32,%f4,%f6 2656 2657 faddd %f34,%f14,%f16 2658 ba,pt %icc,.FIXSIGN 2659 2660! delay slot 2661 faddd %f36,%f24,%f26 2662 2663 2664 .align 32 2665.ENDLOOP2: 2666 fmuld %f10,%f40,%f12 2667 add %l5,thresh,%g1 2668 faddd %f12,%f42,%f12 2669 st %f13,[%fp+n1] 2670 fsubd %f12,%f42,%f12 ! n 2671 fmuld %f12,%f46,%f14 2672 fsubd %f10,%f14,%f14 2673 fmuld %f12,%f48,%f16 2674 fsubd %f14,%f16,%f10 2675 ld [%fp+n1],%o4 2676 fsubd %f14,%f10,%f34 2677 and %o4,1,%o4 2678 fsubd %f34,%f16,%f34 2679 fmuld %f12,%f50,%f18 2680 sll %o4,3,%o4 2681 fsubd %f18,%f34,%f18 2682 ld [%g1+%o4],%f16 2683 fsubd %f10,%f18,%f14 2684 fsubd %f10,%f14,%f34 2685 add %l5,thresh+4,%o7 2686 fsubd %f34,%f18,%f34 2687 fmuld %f12,%f52,%f12 2688 fsubd %f12,%f34,%f12 2689 ld [%o7+%o4],%f18 2690 fsubd %f14,%f12,%f10 ! x 2691 fsubd %f14,%f10,%f14 2692 fands %f10,%f30,%f19 ! save signbit 2693 fabsd %f10,%f10 2694 std %f10,[%fp+x1_1] 2695 fsubd %f14,%f12,%f12 ! y 2696 fcmpgt32 %f16,%f10,%l1 2697 fxors %f12,%f19,%f12 2698 fands %f19,%f18,%f19 ! if (n & 1) clear sign bit 2699 andcc %l1,2,%g0 2700 bne,pn %icc,1f 2701! delay slot 2702 nop 2703 fpadd32s %f10,%f31,%f18 2704 ld [%fp+x1_1],%l1 2705 fand %f18,%f44,%f14 2706 sethi %hi(0x3fc3c000),%o7 2707 add %l3,8,%g1 2708 fsubd %f10,%f14,%f10 2709 sub %l1,%o7,%l1 2710 srl %l1,10,%l1 2711 faddd %f10,%f12,%f10 2712 andn %l1,0x1f,%l1 2713 fmuld %f10,%f10,%f12 2714 add %l1,%o4,%l1 2715 fmuld %f12,%f58,%f16 2716 ldd [%l3+%l1],%f34 2717 faddd %f16,%f56,%f16 2718 fmuld %f12,%f62,%f14 2719 fmuld %f12,%f16,%f16 2720 faddd %f14,%f60,%f14 2721 faddd %f16,%f54,%f16 2722 fmuld %f12,%f14,%f14 2723 ldd [%g1+%l1],%f12 2724 fmuld %f10,%f16,%f16 2725 ldd [%l4+%l1],%f10 2726 fmuld %f14,%f34,%f14 2727 fmuld %f16,%f12,%f16 2728 faddd %f16,%f14,%f16 2729 faddd %f16,%f10,%f16 2730 ba,pt %icc,2f 2731 faddd %f16,%f34,%f16 27321: 2733 fmuld %f10,%f10,%f10 2734 ldd [%l5+%o4],%f34 2735 add %l5,%o4,%l1 2736 fmuld %f10,%f34,%f14 2737 ldd [%l1+0x10],%f16 2738 add %fp,%o4,%o4 2739 faddd %f14,%f16,%f14 2740 ldd [%l1+0x20],%f34 2741 fmuld %f10,%f14,%f14 2742 ldd [%l1+0x30],%f16 2743 faddd %f14,%f34,%f14 2744 ldd [%o4+x1_1],%f34 2745 fmuld %f10,%f14,%f14 2746 std %f12,[%fp+y1_0] 2747 faddd %f14,%f16,%f14 2748 fmuld %f10,%f14,%f14 2749 fmuld %f34,%f14,%f14 2750 ldd [%o4+y1_0],%f12 2751 faddd %f14,%f12,%f14 2752 faddd %f34,%f14,%f16 27532: 2754 add %l5,thresh-4,%g1 2755 ld [%fp+n1],%o4 2756 and %o4,2,%o4 2757 sll %o4,2,%o4 2758 ld [%g1+%o4],%f18 2759 fxors %f19,%f18,%f19 2760 fors %f16,%f19,%f16 ! tack on sign 2761 st %f16,[%o1] 2762 st %f17,[%o1+4] 2763 2764.ENDLOOP1: 2765 fmuld %f0,%f40,%f2 2766 add %l5,thresh,%g1 2767 faddd %f2,%f42,%f2 2768 st %f3,[%fp+n0] 2769 fsubd %f2,%f42,%f2 ! n 2770 fmuld %f2,%f46,%f4 2771 fsubd %f0,%f4,%f4 2772 fmuld %f2,%f48,%f6 2773 fsubd %f4,%f6,%f0 2774 ld [%fp+n0],%o3 2775 fsubd %f4,%f0,%f32 2776 and %o3,1,%o3 2777 fsubd %f32,%f6,%f32 2778 fmuld %f2,%f50,%f8 2779 sll %o3,3,%o3 2780 fsubd %f8,%f32,%f8 2781 ld [%g1+%o3],%f6 2782 fsubd %f0,%f8,%f4 2783 fsubd %f0,%f4,%f32 2784 add %l5,thresh+4,%o7 2785 fsubd %f32,%f8,%f32 2786 fmuld %f2,%f52,%f2 2787 fsubd %f2,%f32,%f2 2788 ld [%o7+%o3],%f8 2789 fsubd %f4,%f2,%f0 ! x 2790 fsubd %f4,%f0,%f4 2791 fands %f0,%f30,%f9 ! save signbit 2792 fabsd %f0,%f0 2793 std %f0,[%fp+x0_1] 2794 fsubd %f4,%f2,%f2 ! y 2795 fcmpgt32 %f6,%f0,%l0 2796 fxors %f2,%f9,%f2 2797 fands %f9,%f8,%f9 ! if (n & 1) clear sign bit 2798 andcc %l0,2,%g0 2799 bne,pn %icc,1f 2800! delay slot 2801 nop 2802 fpadd32s %f0,%f31,%f8 2803 ld [%fp+x0_1],%l0 2804 fand %f8,%f44,%f4 2805 sethi %hi(0x3fc3c000),%o7 2806 add %l3,8,%g1 2807 fsubd %f0,%f4,%f0 2808 sub %l0,%o7,%l0 2809 srl %l0,10,%l0 2810 faddd %f0,%f2,%f0 2811 andn %l0,0x1f,%l0 2812 fmuld %f0,%f0,%f2 2813 add %l0,%o3,%l0 2814 fmuld %f2,%f58,%f6 2815 ldd [%l3+%l0],%f32 2816 faddd %f6,%f56,%f6 2817 fmuld %f2,%f62,%f4 2818 fmuld %f2,%f6,%f6 2819 faddd %f4,%f60,%f4 2820 faddd %f6,%f54,%f6 2821 fmuld %f2,%f4,%f4 2822 ldd [%g1+%l0],%f2 2823 fmuld %f0,%f6,%f6 2824 ldd [%l4+%l0],%f0 2825 fmuld %f4,%f32,%f4 2826 fmuld %f6,%f2,%f6 2827 faddd %f6,%f4,%f6 2828 faddd %f6,%f0,%f6 2829 ba,pt %icc,2f 2830 faddd %f6,%f32,%f6 28311: 2832 fmuld %f0,%f0,%f0 2833 ldd [%l5+%o3],%f32 2834 add %l5,%o3,%l0 2835 fmuld %f0,%f32,%f4 2836 ldd [%l0+0x10],%f6 2837 add %fp,%o3,%o3 2838 faddd %f4,%f6,%f4 2839 ldd [%l0+0x20],%f32 2840 fmuld %f0,%f4,%f4 2841 ldd [%l0+0x30],%f6 2842 faddd %f4,%f32,%f4 2843 ldd [%o3+x0_1],%f32 2844 fmuld %f0,%f4,%f4 2845 std %f2,[%fp+y0_0] 2846 faddd %f4,%f6,%f4 2847 fmuld %f0,%f4,%f4 2848 fmuld %f32,%f4,%f4 2849 ldd [%o3+y0_0],%f2 2850 faddd %f4,%f2,%f4 2851 faddd %f32,%f4,%f6 28522: 2853 add %l5,thresh-4,%g1 2854 ld [%fp+n0],%o3 2855 and %o3,2,%o3 2856 sll %o3,2,%o3 2857 ld [%g1+%o3],%f8 2858 fxors %f9,%f8,%f9 2859 fors %f6,%f9,%f6 ! tack on sign 2860 st %f6,[%o0] 2861 st %f7,[%o0+4] 2862 2863.ENDLOOP0: 2864 2865! check for huge arguments remaining 2866 2867 tst LIM_l6 2868 be,pt %icc,.exit 2869! delay slot 2870 nop 2871 2872! ========== huge range (use C code) ========== 2873 2874#ifdef __sparcv9 2875 ldx [%fp+xsave],%o1 2876 ldx [%fp+ysave],%o3 2877#else 2878 ld [%fp+xsave],%o1 2879 ld [%fp+ysave],%o3 2880#endif 2881 ld [%fp+nsave],%o0 2882 ld [%fp+sxsave],%o2 2883 ld [%fp+sysave],%o4 2884 sra %o2,0,%o2 ! sign-extend for V9 2885 sra %o4,0,%o4 2886 call __vlibm_vsin_big 2887 mov %l7,%o5 ! delay slot 2888 2889.exit: 2890 ret 2891 restore 2892 2893 2894 .align 32 2895.SKIP0: 2896 addcc %i0,-1,%i0 2897 ble,pn %icc,.ENDLOOP0 2898! delay slot, harmless if branch taken 2899 add %i3,%i4,%i3 ! y += stridey 2900 andn %l1,%i5,%l0 ! hx &= ~0x80000000 2901 fmovs %f10,%f0 2902 ld [%i1+4],%f1 2903 ba,pt %icc,.LOOP0 2904! delay slot 2905 add %i1,%i2,%i1 ! x += stridex 2906 2907 2908 .align 32 2909.SKIP1: 2910 addcc %i0,-1,%i0 2911 ble,pn %icc,.ENDLOOP1 2912! delay slot, harmless if branch taken 2913 add %i3,%i4,%i3 ! y += stridey 2914 andn %l2,%i5,%l1 ! hx &= ~0x80000000 2915 fmovs %f20,%f10 2916 ld [%i1+4],%f11 2917 ba,pt %icc,.LOOP1 2918! delay slot 2919 add %i1,%i2,%i1 ! x += stridex 2920 2921 2922 .align 32 2923.SKIP2: 2924 addcc %i0,-1,%i0 2925 ble,pn %icc,.ENDLOOP2 2926! delay slot, harmless if branch taken 2927 add %i3,%i4,%i3 ! y += stridey 2928 ld [%i1],%l2 2929 ld [%i1],%f20 2930 ld [%i1+4],%f21 2931 andn %l2,%i5,%l2 ! hx &= ~0x80000000 2932 ba,pt %icc,.LOOP2 2933! delay slot 2934 add %i1,%i2,%i1 ! x += stridex 2935 2936 2937 .align 32 2938.BIG0: 2939 sethi %hi(0x7ff00000),%o7 2940 cmp %l0,%o7 2941 bl,a,pt %icc,1f ! if hx < 0x7ff00000 2942! delay slot, annulled if branch not taken 2943 mov %l7,LIM_l6 ! set biguns flag or 2944 fsubd %f0,%f0,%f0 ! y = x - x 2945 st %f0,[%o0] 2946 st %f1,[%o0+4] 29471: 2948 addcc %i0,-1,%i0 2949 ble,pn %icc,.ENDLOOP0 2950! delay slot, harmless if branch taken 2951 andn %l1,%i5,%l0 ! hx &= ~0x80000000 2952 fmovd %f10,%f0 2953 ba,pt %icc,.LOOP0 2954! delay slot 2955 add %i1,%i2,%i1 ! x += stridex 2956 2957 2958 .align 32 2959.BIG1: 2960 sethi %hi(0x7ff00000),%o7 2961 cmp %l1,%o7 2962 bl,a,pt %icc,1f ! if hx < 0x7ff00000 2963! delay slot, annulled if branch not taken 2964 mov %l7,LIM_l6 ! set biguns flag or 2965 fsubd %f10,%f10,%f10 ! y = x - x 2966 st %f10,[%o1] 2967 st %f11,[%o1+4] 29681: 2969 addcc %i0,-1,%i0 2970 ble,pn %icc,.ENDLOOP1 2971! delay slot, harmless if branch taken 2972 andn %l2,%i5,%l1 ! hx &= ~0x80000000 2973 fmovd %f20,%f10 2974 ba,pt %icc,.LOOP1 2975! delay slot 2976 add %i1,%i2,%i1 ! x += stridex 2977 2978 2979 .align 32 2980.BIG2: 2981 sethi %hi(0x7ff00000),%o7 2982 cmp %l2,%o7 2983 bl,a,pt %icc,1f ! if hx < 0x7ff00000 2984! delay slot, annulled if branch not taken 2985 mov %l7,LIM_l6 ! set biguns flag or 2986 fsubd %f20,%f20,%f20 ! y = x - x 2987 st %f20,[%o2] 2988 st %f21,[%o2+4] 29891: 2990 addcc %i0,-1,%i0 2991 ble,pn %icc,.ENDLOOP2 2992! delay slot 2993 nop 2994 ld [%i1],%l2 2995 ld [%i1],%f20 2996 ld [%i1+4],%f21 2997 andn %l2,%i5,%l2 ! hx &= ~0x80000000 2998 ba,pt %icc,.LOOP2 2999! delay slot 3000 add %i1,%i2,%i1 ! x += stridex 3001 3002 SET_SIZE(__vsin) 3003 3004