1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vsin_ultra3.S" 30 31#include "libm.h" 32#if defined(LIBMVEC_SO_BUILD) 33 .weak __vsin 34 .type __vsin,#function 35 __vsin = __vsin_ultra3 36#endif 37 38 RO_DATA 39 .align 64 40constants: 41 .word 0x42c80000,0x00000000 ! 3 * 2^44 42 .word 0x43380000,0x00000000 ! 3 * 2^51 43 .word 0x3fe45f30,0x6dc9c883 ! invpio2 44 .word 0x3ff921fb,0x54442c00 ! pio2_1 45 .word 0x3d318469,0x898cc400 ! pio2_2 46 .word 0x3a71701b,0x839a2520 ! pio2_3 47 .word 0xbfc55555,0x55555533 ! pp1 48 .word 0x3f811111,0x10e7d53b ! pp2 49 .word 0xbf2a0167,0xe6b3cf9b ! pp3 50 .word 0xbfdfffff,0xffffff65 ! qq1 51 .word 0x3fa55555,0x54f88ed0 ! qq2 52 .word 0xbf56c12c,0xdd185f60 ! qq3 53 54! local storage indices 55 56#define xsave STACK_BIAS-0x8 57#define ysave STACK_BIAS-0x10 58#define nsave STACK_BIAS-0x14 59#define sxsave STACK_BIAS-0x18 60#define sysave STACK_BIAS-0x1c 61#define biguns STACK_BIAS-0x20 62#define nk3 STACK_BIAS-0x24 63#define nk2 STACK_BIAS-0x28 64#define nk1 STACK_BIAS-0x2c 65#define nk0 STACK_BIAS-0x30 66#define junk STACK_BIAS-0x38 67! sizeof temp storage - must be a multiple of 16 for V9 68#define tmps 0x40 69 70! register use 71 72! i0 n 73! i1 x 74! i2 stridex 75! i3 y 76! i4 stridey 77! i5 0x80000000 78 79! l0 hx0 80! l1 hx1 81! l2 hx2 82! l3 hx3 83! l4 k0 84! l5 k1 85! l6 k2 86! l7 k3 87 88! the following are 64-bit registers in both V8+ and V9 89 90! g1 __vlibm_TBL_sincos2 91! g5 scratch 92 93! o0 py0 94! o1 py1 95! o2 py2 96! o3 py3 97! o4 0x3e400000 98! o5 0x3fe921fb,0x4099251e 99! o7 scratch 100 101! f0 hx0 102! f2 103! f4 104! f6 105! f8 hx1 106! f10 107! f12 108! f14 109! f16 hx2 110! f18 111! f20 112! f22 113! f24 hx3 114! f26 115! f28 116! f30 117! f32 118! f34 119! f36 120! f38 121 122#define c3two44 %f40 123#define c3two51 %f42 124#define invpio2 %f44 125#define pio2_1 %f46 126#define pio2_2 %f48 127#define pio2_3 %f50 128#define pp1 %f52 129#define pp2 %f54 130#define pp3 %f56 131#define qq1 %f58 132#define qq2 %f60 133#define qq3 %f62 134 135 ENTRY(__vsin_ultra3) 136 save %sp,-SA(MINFRAME)-tmps,%sp 137 PIC_SETUP(l7) 138 PIC_SET(l7,constants,o0) 139 PIC_SET(l7,__vlibm_TBL_sincos2,o1) 140 mov %o1,%g1 141 wr %g0,0x82,%asi ! set %asi for non-faulting loads 142#ifdef __sparcv9 143 stx %i1,[%fp+xsave] ! save arguments 144 stx %i3,[%fp+ysave] 145#else 146 st %i1,[%fp+xsave] ! save arguments 147 st %i3,[%fp+ysave] 148#endif 149 st %i0,[%fp+nsave] 150 st %i2,[%fp+sxsave] 151 st %i4,[%fp+sysave] 152 st %g0,[%fp+biguns] ! biguns = 0 153 ldd [%o0+0x00],c3two44 ! load/set up constants 154 ldd [%o0+0x08],c3two51 155 ldd [%o0+0x10],invpio2 156 ldd [%o0+0x18],pio2_1 157 ldd [%o0+0x20],pio2_2 158 ldd [%o0+0x28],pio2_3 159 ldd [%o0+0x30],pp1 160 ldd [%o0+0x38],pp2 161 ldd [%o0+0x40],pp3 162 ldd [%o0+0x48],qq1 163 ldd [%o0+0x50],qq2 164 ldd [%o0+0x58],qq3 165 sethi %hi(0x80000000),%i5 166 sethi %hi(0x3e400000),%o4 167 sethi %hi(0x3fe921fb),%o5 168 or %o5,%lo(0x3fe921fb),%o5 169 sllx %o5,32,%o5 170 sethi %hi(0x4099251e),%o7 171 or %o7,%lo(0x4099251e),%o7 172 or %o5,%o7,%o5 173 sll %i2,3,%i2 ! scale strides 174 sll %i4,3,%i4 175 add %fp,junk,%o1 ! loop prologue 176 add %fp,junk,%o2 177 add %fp,junk,%o3 178 ld [%i1],%l0 ! *x 179 ld [%i1],%f0 180 ld [%i1+4],%f3 181 andn %l0,%i5,%l0 ! mask off sign 182 ba .loop0 183 add %i1,%i2,%i1 ! x += stridex 184 185! 16-byte aligned 186 .align 16 187.loop0: 188 lda [%i1]%asi,%l1 ! preload next argument 189 sub %l0,%o4,%g5 190 sub %o5,%l0,%o7 191 fabss %f0,%f2 192 193 lda [%i1]%asi,%f8 194 orcc %o7,%g5,%g0 195 mov %i3,%o0 ! py0 = y 196 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e 197 198! delay slot 199 lda [%i1+4]%asi,%f11 200 addcc %i0,-1,%i0 201 add %i3,%i4,%i3 ! y += stridey 202 ble,pn %icc,.last1 203 204! delay slot 205 andn %l1,%i5,%l1 206 add %i1,%i2,%i1 ! x += stridex 207 faddd %f2,c3two44,%f4 208 st %f15,[%o1+4] 209 210.loop1: 211 lda [%i1]%asi,%l2 ! preload next argument 212 sub %l1,%o4,%g5 213 sub %o5,%l1,%o7 214 fabss %f8,%f10 215 216 lda [%i1]%asi,%f16 217 orcc %o7,%g5,%g0 218 mov %i3,%o1 ! py1 = y 219 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e 220 221! delay slot 222 lda [%i1+4]%asi,%f19 223 addcc %i0,-1,%i0 224 add %i3,%i4,%i3 ! y += stridey 225 ble,pn %icc,.last2 226 227! delay slot 228 andn %l2,%i5,%l2 229 add %i1,%i2,%i1 ! x += stridex 230 faddd %f10,c3two44,%f12 231 st %f23,[%o2+4] 232 233.loop2: 234 lda [%i1]%asi,%l3 ! preload next argument 235 sub %l2,%o4,%g5 236 sub %o5,%l2,%o7 237 fabss %f16,%f18 238 239 lda [%i1]%asi,%f24 240 orcc %o7,%g5,%g0 241 mov %i3,%o2 ! py2 = y 242 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e 243 244! delay slot 245 lda [%i1+4]%asi,%f27 246 addcc %i0,-1,%i0 247 add %i3,%i4,%i3 ! y += stridey 248 ble,pn %icc,.last3 249 250! delay slot 251 andn %l3,%i5,%l3 252 add %i1,%i2,%i1 ! x += stridex 253 faddd %f18,c3two44,%f20 254 st %f31,[%o3+4] 255 256.loop3: 257 sub %l3,%o4,%g5 258 sub %o5,%l3,%o7 259 fabss %f24,%f26 260 st %f5,[%fp+nk0] 261 262 orcc %o7,%g5,%g0 263 mov %i3,%o3 ! py3 = y 264 bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e 265! delay slot 266 st %f13,[%fp+nk1] 267 268!!! DONE? 269.cont: 270 srlx %o5,32,%o7 271 add %i3,%i4,%i3 ! y += stridey 272 fmovs %f3,%f1 273 st %f21,[%fp+nk2] 274 275 sub %o7,%l0,%l0 276 sub %o7,%l1,%l1 277 faddd %f26,c3two44,%f28 278 st %f29,[%fp+nk3] 279 280 sub %o7,%l2,%l2 281 sub %o7,%l3,%l3 282 fmovs %f11,%f9 283 284 or %l0,%l1,%l0 285 or %l2,%l3,%l2 286 fmovs %f19,%f17 287 288 fmovs %f27,%f25 289 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range 290 291 fmuld %f8,invpio2,%f14 292 ld [%fp+nk0],%l4 293 294 fmuld %f16,invpio2,%f22 295 ld [%fp+nk1],%l5 296 297 orcc %l0,%l2,%g0 298 bl,pn %icc,.medium 299! delay slot 300 fmuld %f24,invpio2,%f30 301 ld [%fp+nk2],%l6 302 303 ld [%fp+nk3],%l7 304 sll %l4,5,%l4 ! k 305 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 306 307 sll %l5,5,%l5 308 ldd [%l4+%g1],%f4 309 fcmpd %fcc1,%f8,pio2_3 310 311 sll %l6,5,%l6 312 ldd [%l5+%g1],%f12 313 fcmpd %fcc2,%f16,pio2_3 314 315 sll %l7,5,%l7 316 ldd [%l6+%g1],%f20 317 fcmpd %fcc3,%f24,pio2_3 318 319 ldd [%l7+%g1],%f28 320 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 321 322 fsubd %f10,%f12,%f10 323 324 fsubd %f18,%f20,%f18 325 326 fsubd %f26,%f28,%f26 327 328 fmuld %f2,%f2,%f0 ! z = x * x 329 330 fmuld %f10,%f10,%f8 331 332 fmuld %f18,%f18,%f16 333 334 fmuld %f26,%f26,%f24 335 336 fmuld %f0,pp3,%f6 337 338 fmuld %f8,pp3,%f14 339 340 fmuld %f16,pp3,%f22 341 342 fmuld %f24,pp3,%f30 343 344 faddd %f6,pp2,%f6 345 fmuld %f0,qq2,%f4 346 347 faddd %f14,pp2,%f14 348 fmuld %f8,qq2,%f12 349 350 faddd %f22,pp2,%f22 351 fmuld %f16,qq2,%f20 352 353 faddd %f30,pp2,%f30 354 fmuld %f24,qq2,%f28 355 356 fmuld %f0,%f6,%f6 357 faddd %f4,qq1,%f4 358 359 fmuld %f8,%f14,%f14 360 faddd %f12,qq1,%f12 361 362 fmuld %f16,%f22,%f22 363 faddd %f20,qq1,%f20 364 365 fmuld %f24,%f30,%f30 366 faddd %f28,qq1,%f28 367 368 faddd %f6,pp1,%f6 369 fmuld %f0,%f4,%f4 370 add %l4,%g1,%l4 371 372 faddd %f14,pp1,%f14 373 fmuld %f8,%f12,%f12 374 add %l5,%g1,%l5 375 376 faddd %f22,pp1,%f22 377 fmuld %f16,%f20,%f20 378 add %l6,%g1,%l6 379 380 faddd %f30,pp1,%f30 381 fmuld %f24,%f28,%f28 382 add %l7,%g1,%l7 383 384 fmuld %f0,%f6,%f6 385 ldd [%l4+8],%f0 386 387 fmuld %f8,%f14,%f14 388 ldd [%l5+8],%f8 389 390 fmuld %f16,%f22,%f22 391 ldd [%l6+8],%f16 392 393 fmuld %f24,%f30,%f30 394 ldd [%l7+8],%f24 395 396 fmuld %f2,%f6,%f6 397 398 fmuld %f10,%f14,%f14 399 400 fmuld %f18,%f22,%f22 401 402 fmuld %f26,%f30,%f30 403 404 faddd %f6,%f2,%f6 405 fmuld %f0,%f4,%f4 406 ldd [%l4+16],%f2 407 408 faddd %f14,%f10,%f14 409 fmuld %f8,%f12,%f12 410 ldd [%l5+16],%f10 411 412 faddd %f22,%f18,%f22 413 fmuld %f16,%f20,%f20 414 ldd [%l6+16],%f18 415 416 faddd %f30,%f26,%f30 417 fmuld %f24,%f28,%f28 418 ldd [%l7+16],%f26 419 420 fmuld %f2,%f6,%f6 421 422 fmuld %f10,%f14,%f14 423 424 fmuld %f18,%f22,%f22 425 426 fmuld %f26,%f30,%f30 427 428 faddd %f6,%f4,%f6 429 430 faddd %f14,%f12,%f14 431 432 faddd %f22,%f20,%f22 433 434 faddd %f30,%f28,%f30 435 436 faddd %f6,%f0,%f6 437 438 faddd %f14,%f8,%f14 439 440 faddd %f22,%f16,%f22 441 442 faddd %f30,%f24,%f30 443 444 fnegd %f6,%f4 445 lda [%i1]%asi,%l0 ! preload next argument 446 447 fnegd %f14,%f12 448 lda [%i1]%asi,%f0 449 450 fnegd %f22,%f20 451 lda [%i1+4]%asi,%f3 452 453 fnegd %f30,%f28 454 andn %l0,%i5,%l0 455 add %i1,%i2,%i1 456 457 fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s 458 st %f6,[%o0] 459 460 fmovdl %fcc1,%f12,%f14 461 st %f14,[%o1] 462 463 fmovdl %fcc2,%f20,%f22 464 st %f22,[%o2] 465 466 fmovdl %fcc3,%f28,%f30 467 st %f30,[%o3] 468 addcc %i0,-1,%i0 469 470 bg,pt %icc,.loop0 471! delay slot 472 st %f7,[%o0+4] 473 474 ba,pt %icc,.end 475! delay slot 476 nop 477 478 479 .align 16 480.medium: 481 faddd %f6,c3two51,%f4 482 st %f5,[%fp+nk0] 483 484 faddd %f14,c3two51,%f12 485 st %f13,[%fp+nk1] 486 487 faddd %f22,c3two51,%f20 488 st %f21,[%fp+nk2] 489 490 faddd %f30,c3two51,%f28 491 st %f29,[%fp+nk3] 492 493 fsubd %f4,c3two51,%f6 494 495 fsubd %f12,c3two51,%f14 496 497 fsubd %f20,c3two51,%f22 498 499 fsubd %f28,c3two51,%f30 500 501 fmuld %f6,pio2_1,%f2 502 ld [%fp+nk0],%l0 ! n 503 504 fmuld %f14,pio2_1,%f10 505 ld [%fp+nk1],%l1 506 507 fmuld %f22,pio2_1,%f18 508 ld [%fp+nk2],%l2 509 510 fmuld %f30,pio2_1,%f26 511 ld [%fp+nk3],%l3 512 513 fsubd %f0,%f2,%f0 514 fmuld %f6,pio2_2,%f4 515 516 fsubd %f8,%f10,%f8 517 fmuld %f14,pio2_2,%f12 518 519 fsubd %f16,%f18,%f16 520 fmuld %f22,pio2_2,%f20 521 522 fsubd %f24,%f26,%f24 523 fmuld %f30,pio2_2,%f28 524 525 fsubd %f0,%f4,%f32 526 527 fsubd %f8,%f12,%f34 528 529 fsubd %f16,%f20,%f36 530 531 fsubd %f24,%f28,%f38 532 533 fsubd %f0,%f32,%f0 534 fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 535 536 fsubd %f8,%f34,%f8 537 fcmple32 %f34,pio2_3,%l5 538 539 fsubd %f16,%f36,%f16 540 fcmple32 %f36,pio2_3,%l6 541 542 fsubd %f24,%f38,%f24 543 fcmple32 %f38,pio2_3,%l7 544 545 fsubd %f0,%f4,%f0 546 fmuld %f6,pio2_3,%f6 547 sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 548 549 fsubd %f8,%f12,%f8 550 fmuld %f14,pio2_3,%f14 551 sll %l5,30,%l5 552 553 fsubd %f16,%f20,%f16 554 fmuld %f22,pio2_3,%f22 555 sll %l6,30,%l6 556 557 fsubd %f24,%f28,%f24 558 fmuld %f30,pio2_3,%f30 559 sll %l7,30,%l7 560 561 fsubd %f6,%f0,%f6 562 sra %l4,31,%l4 563 564 fsubd %f14,%f8,%f14 565 sra %l5,31,%l5 566 567 fsubd %f22,%f16,%f22 568 sra %l6,31,%l6 569 570 fsubd %f30,%f24,%f30 571 sra %l7,31,%l7 572 573 fsubd %f32,%f6,%f0 ! reduced x 574 xor %l0,%l4,%l0 575 576 fsubd %f34,%f14,%f8 577 xor %l1,%l5,%l1 578 579 fsubd %f36,%f22,%f16 580 xor %l2,%l6,%l2 581 582 fsubd %f38,%f30,%f24 583 xor %l3,%l7,%l3 584 585 fabsd %f0,%f2 586 sub %l0,%l4,%l0 587 588 fabsd %f8,%f10 589 sub %l1,%l5,%l1 590 591 fabsd %f16,%f18 592 sub %l2,%l6,%l2 593 594 fabsd %f24,%f26 595 sub %l3,%l7,%l3 596 597 faddd %f2,c3two44,%f4 598 st %f5,[%fp+nk0] 599 and %l4,2,%l4 600 601 faddd %f10,c3two44,%f12 602 st %f13,[%fp+nk1] 603 and %l5,2,%l5 604 605 faddd %f18,c3two44,%f20 606 st %f21,[%fp+nk2] 607 and %l6,2,%l6 608 609 faddd %f26,c3two44,%f28 610 st %f29,[%fp+nk3] 611 and %l7,2,%l7 612 613 fsubd %f32,%f0,%f4 614 xor %l0,%l4,%l0 615 616 fsubd %f34,%f8,%f12 617 xor %l1,%l5,%l1 618 619 fsubd %f36,%f16,%f20 620 xor %l2,%l6,%l2 621 622 fsubd %f38,%f24,%f28 623 xor %l3,%l7,%l3 624 625 fzero %f38 626 ld [%fp+nk0],%l4 627 628 fsubd %f4,%f6,%f6 ! w 629 ld [%fp+nk1],%l5 630 631 fsubd %f12,%f14,%f14 632 ld [%fp+nk2],%l6 633 634 fnegd %f38,%f38 635 ld [%fp+nk3],%l7 636 sll %l4,5,%l4 ! k 637 638 fsubd %f20,%f22,%f22 639 sll %l5,5,%l5 640 641 fsubd %f28,%f30,%f30 642 sll %l6,5,%l6 643 644 fand %f0,%f38,%f32 ! sign bit of x 645 ldd [%l4+%g1],%f4 646 sll %l7,5,%l7 647 648 fand %f8,%f38,%f34 649 ldd [%l5+%g1],%f12 650 651 fand %f16,%f38,%f36 652 ldd [%l6+%g1],%f20 653 654 fand %f24,%f38,%f38 655 ldd [%l7+%g1],%f28 656 657 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 658 659 fsubd %f10,%f12,%f10 660 661 fsubd %f18,%f20,%f18 662 nop 663 664 fsubd %f26,%f28,%f26 665 nop 666 667! 16-byte aligned 668 fmuld %f2,%f2,%f0 ! z = x * x 669 andcc %l0,1,%g0 670 bz,pn %icc,.case8 671! delay slot 672 fxor %f6,%f32,%f32 673 674 fmuld %f10,%f10,%f8 675 andcc %l1,1,%g0 676 bz,pn %icc,.case4 677! delay slot 678 fxor %f14,%f34,%f34 679 680 fmuld %f18,%f18,%f16 681 andcc %l2,1,%g0 682 bz,pn %icc,.case2 683! delay slot 684 fxor %f22,%f36,%f36 685 686 fmuld %f26,%f26,%f24 687 andcc %l3,1,%g0 688 bz,pn %icc,.case1 689! delay slot 690 fxor %f30,%f38,%f38 691 692!.case0: 693 fmuld %f0,qq3,%f6 ! cos(x0) 694 695 fmuld %f8,qq3,%f14 ! cos(x1) 696 697 fmuld %f16,qq3,%f22 ! cos(x2) 698 699 fmuld %f24,qq3,%f30 ! cos(x3) 700 701 faddd %f6,qq2,%f6 702 fmuld %f0,pp2,%f4 703 704 faddd %f14,qq2,%f14 705 fmuld %f8,pp2,%f12 706 707 faddd %f22,qq2,%f22 708 fmuld %f16,pp2,%f20 709 710 faddd %f30,qq2,%f30 711 fmuld %f24,pp2,%f28 712 713 fmuld %f0,%f6,%f6 714 faddd %f4,pp1,%f4 715 716 fmuld %f8,%f14,%f14 717 faddd %f12,pp1,%f12 718 719 fmuld %f16,%f22,%f22 720 faddd %f20,pp1,%f20 721 722 fmuld %f24,%f30,%f30 723 faddd %f28,pp1,%f28 724 725 faddd %f6,qq1,%f6 726 fmuld %f0,%f4,%f4 727 add %l4,%g1,%l4 728 729 faddd %f14,qq1,%f14 730 fmuld %f8,%f12,%f12 731 add %l5,%g1,%l5 732 733 faddd %f22,qq1,%f22 734 fmuld %f16,%f20,%f20 735 add %l6,%g1,%l6 736 737 faddd %f30,qq1,%f30 738 fmuld %f24,%f28,%f28 739 add %l7,%g1,%l7 740 741 fmuld %f2,%f4,%f4 742 743 fmuld %f10,%f12,%f12 744 745 fmuld %f18,%f20,%f20 746 747 fmuld %f26,%f28,%f28 748 749 fmuld %f0,%f6,%f6 750 faddd %f4,%f32,%f4 751 ldd [%l4+16],%f0 752 753 fmuld %f8,%f14,%f14 754 faddd %f12,%f34,%f12 755 ldd [%l5+16],%f8 756 757 fmuld %f16,%f22,%f22 758 faddd %f20,%f36,%f20 759 ldd [%l6+16],%f16 760 761 fmuld %f24,%f30,%f30 762 faddd %f28,%f38,%f28 763 ldd [%l7+16],%f24 764 765 fmuld %f0,%f6,%f6 766 faddd %f4,%f2,%f4 767 ldd [%l4+8],%f32 768 769 fmuld %f8,%f14,%f14 770 faddd %f12,%f10,%f12 771 ldd [%l5+8],%f34 772 773 fmuld %f16,%f22,%f22 774 faddd %f20,%f18,%f20 775 ldd [%l6+8],%f36 776 777 fmuld %f24,%f30,%f30 778 faddd %f28,%f26,%f28 779 ldd [%l7+8],%f38 780 781 fmuld %f32,%f4,%f4 782 783 fmuld %f34,%f12,%f12 784 785 fmuld %f36,%f20,%f20 786 787 fmuld %f38,%f28,%f28 788 789 fsubd %f6,%f4,%f6 790 791 fsubd %f14,%f12,%f14 792 793 fsubd %f22,%f20,%f22 794 795 fsubd %f30,%f28,%f30 796 797 faddd %f6,%f0,%f6 798 799 faddd %f14,%f8,%f14 800 801 faddd %f22,%f16,%f22 802 803 faddd %f30,%f24,%f30 804 mov %l0,%l4 805 806 fnegd %f6,%f4 807 lda [%i1]%asi,%l0 ! preload next argument 808 809 fnegd %f14,%f12 810 lda [%i1]%asi,%f0 811 812 fnegd %f22,%f20 813 lda [%i1+4]%asi,%f3 814 815 fnegd %f30,%f28 816 andn %l0,%i5,%l0 817 add %i1,%i2,%i1 818 819 andcc %l4,2,%g0 820 fmovdnz %icc,%f4,%f6 821 st %f6,[%o0] 822 823 andcc %l1,2,%g0 824 fmovdnz %icc,%f12,%f14 825 st %f14,[%o1] 826 827 andcc %l2,2,%g0 828 fmovdnz %icc,%f20,%f22 829 st %f22,[%o2] 830 831 andcc %l3,2,%g0 832 fmovdnz %icc,%f28,%f30 833 st %f30,[%o3] 834 835 addcc %i0,-1,%i0 836 bg,pt %icc,.loop0 837! delay slot 838 st %f7,[%o0+4] 839 840 ba,pt %icc,.end 841! delay slot 842 nop 843 844 .align 16 845.case1: 846 fmuld %f24,pp3,%f30 ! sin(x3) 847 848 fmuld %f0,qq3,%f6 ! cos(x0) 849 850 fmuld %f8,qq3,%f14 ! cos(x1) 851 852 fmuld %f16,qq3,%f22 ! cos(x2) 853 854 faddd %f30,pp2,%f30 855 fmuld %f24,qq2,%f28 856 857 faddd %f6,qq2,%f6 858 fmuld %f0,pp2,%f4 859 860 faddd %f14,qq2,%f14 861 fmuld %f8,pp2,%f12 862 863 faddd %f22,qq2,%f22 864 fmuld %f16,pp2,%f20 865 866 fmuld %f24,%f30,%f30 867 faddd %f28,qq1,%f28 868 869 fmuld %f0,%f6,%f6 870 faddd %f4,pp1,%f4 871 872 fmuld %f8,%f14,%f14 873 faddd %f12,pp1,%f12 874 875 fmuld %f16,%f22,%f22 876 faddd %f20,pp1,%f20 877 878 faddd %f30,pp1,%f30 879 fmuld %f24,%f28,%f28 880 add %l7,%g1,%l7 881 882 faddd %f6,qq1,%f6 883 fmuld %f0,%f4,%f4 884 add %l4,%g1,%l4 885 886 faddd %f14,qq1,%f14 887 fmuld %f8,%f12,%f12 888 add %l5,%g1,%l5 889 890 faddd %f22,qq1,%f22 891 fmuld %f16,%f20,%f20 892 add %l6,%g1,%l6 893 894 fmuld %f24,%f30,%f30 895 896 fmuld %f2,%f4,%f4 897 898 fmuld %f10,%f12,%f12 899 900 fmuld %f18,%f20,%f20 901 902 fmuld %f26,%f30,%f30 903 ldd [%l7+8],%f24 904 905 fmuld %f0,%f6,%f6 906 faddd %f4,%f32,%f4 907 ldd [%l4+16],%f0 908 909 fmuld %f8,%f14,%f14 910 faddd %f12,%f34,%f12 911 ldd [%l5+16],%f8 912 913 fmuld %f16,%f22,%f22 914 faddd %f20,%f36,%f20 915 ldd [%l6+16],%f16 916 917 fmuld %f24,%f28,%f28 918 faddd %f38,%f30,%f30 919 920 fmuld %f0,%f6,%f6 921 faddd %f4,%f2,%f4 922 ldd [%l4+8],%f32 923 924 fmuld %f8,%f14,%f14 925 faddd %f12,%f10,%f12 926 ldd [%l5+8],%f34 927 928 fmuld %f16,%f22,%f22 929 faddd %f20,%f18,%f20 930 ldd [%l6+8],%f36 931 932 faddd %f26,%f30,%f30 933 ldd [%l7+16],%f38 934 935 fmuld %f32,%f4,%f4 936 937 fmuld %f34,%f12,%f12 938 939 fmuld %f36,%f20,%f20 940 941 fmuld %f38,%f30,%f30 942 943 fsubd %f6,%f4,%f6 944 945 fsubd %f14,%f12,%f14 946 947 fsubd %f22,%f20,%f22 948 949 faddd %f30,%f28,%f30 950 951 faddd %f6,%f0,%f6 952 953 faddd %f14,%f8,%f14 954 955 faddd %f22,%f16,%f22 956 957 faddd %f30,%f24,%f30 958 mov %l0,%l4 959 960 fnegd %f6,%f4 961 lda [%i1]%asi,%l0 ! preload next argument 962 963 fnegd %f14,%f12 964 lda [%i1]%asi,%f0 965 966 fnegd %f22,%f20 967 lda [%i1+4]%asi,%f3 968 969 fnegd %f30,%f28 970 andn %l0,%i5,%l0 971 add %i1,%i2,%i1 972 973 andcc %l4,2,%g0 974 fmovdnz %icc,%f4,%f6 975 st %f6,[%o0] 976 977 andcc %l1,2,%g0 978 fmovdnz %icc,%f12,%f14 979 st %f14,[%o1] 980 981 andcc %l2,2,%g0 982 fmovdnz %icc,%f20,%f22 983 st %f22,[%o2] 984 985 andcc %l3,2,%g0 986 fmovdnz %icc,%f28,%f30 987 st %f30,[%o3] 988 989 addcc %i0,-1,%i0 990 bg,pt %icc,.loop0 991! delay slot 992 st %f7,[%o0+4] 993 994 ba,pt %icc,.end 995! delay slot 996 nop 997 998 .align 16 999.case2: 1000 fmuld %f26,%f26,%f24 1001 andcc %l3,1,%g0 1002 bz,pn %icc,.case3 1003! delay slot 1004 fxor %f30,%f38,%f38 1005 1006 fmuld %f16,pp3,%f22 ! sin(x2) 1007 1008 fmuld %f0,qq3,%f6 ! cos(x0) 1009 1010 fmuld %f8,qq3,%f14 ! cos(x1) 1011 1012 faddd %f22,pp2,%f22 1013 fmuld %f16,qq2,%f20 1014 1015 fmuld %f24,qq3,%f30 ! cos(x3) 1016 1017 faddd %f6,qq2,%f6 1018 fmuld %f0,pp2,%f4 1019 1020 faddd %f14,qq2,%f14 1021 fmuld %f8,pp2,%f12 1022 1023 fmuld %f16,%f22,%f22 1024 faddd %f20,qq1,%f20 1025 1026 faddd %f30,qq2,%f30 1027 fmuld %f24,pp2,%f28 1028 1029 fmuld %f0,%f6,%f6 1030 faddd %f4,pp1,%f4 1031 1032 fmuld %f8,%f14,%f14 1033 faddd %f12,pp1,%f12 1034 1035 faddd %f22,pp1,%f22 1036 fmuld %f16,%f20,%f20 1037 add %l6,%g1,%l6 1038 1039 fmuld %f24,%f30,%f30 1040 faddd %f28,pp1,%f28 1041 1042 faddd %f6,qq1,%f6 1043 fmuld %f0,%f4,%f4 1044 add %l4,%g1,%l4 1045 1046 faddd %f14,qq1,%f14 1047 fmuld %f8,%f12,%f12 1048 add %l5,%g1,%l5 1049 1050 fmuld %f16,%f22,%f22 1051 1052 faddd %f30,qq1,%f30 1053 fmuld %f24,%f28,%f28 1054 add %l7,%g1,%l7 1055 1056 fmuld %f2,%f4,%f4 1057 1058 fmuld %f10,%f12,%f12 1059 1060 fmuld %f18,%f22,%f22 1061 ldd [%l6+8],%f16 1062 1063 fmuld %f26,%f28,%f28 1064 1065 fmuld %f0,%f6,%f6 1066 faddd %f4,%f32,%f4 1067 ldd [%l4+16],%f0 1068 1069 fmuld %f8,%f14,%f14 1070 faddd %f12,%f34,%f12 1071 ldd [%l5+16],%f8 1072 1073 fmuld %f16,%f20,%f20 1074 faddd %f36,%f22,%f22 1075 1076 fmuld %f24,%f30,%f30 1077 faddd %f28,%f38,%f28 1078 ldd [%l7+16],%f24 1079 1080 fmuld %f0,%f6,%f6 1081 faddd %f4,%f2,%f4 1082 ldd [%l4+8],%f32 1083 1084 fmuld %f8,%f14,%f14 1085 faddd %f12,%f10,%f12 1086 ldd [%l5+8],%f34 1087 1088 faddd %f18,%f22,%f22 1089 ldd [%l6+16],%f36 1090 1091 fmuld %f24,%f30,%f30 1092 faddd %f28,%f26,%f28 1093 ldd [%l7+8],%f38 1094 1095 fmuld %f32,%f4,%f4 1096 1097 fmuld %f34,%f12,%f12 1098 1099 fmuld %f36,%f22,%f22 1100 1101 fmuld %f38,%f28,%f28 1102 1103 fsubd %f6,%f4,%f6 1104 1105 fsubd %f14,%f12,%f14 1106 1107 faddd %f22,%f20,%f22 1108 1109 fsubd %f30,%f28,%f30 1110 1111 faddd %f6,%f0,%f6 1112 1113 faddd %f14,%f8,%f14 1114 1115 faddd %f22,%f16,%f22 1116 1117 faddd %f30,%f24,%f30 1118 mov %l0,%l4 1119 1120 fnegd %f6,%f4 1121 lda [%i1]%asi,%l0 ! preload next argument 1122 1123 fnegd %f14,%f12 1124 lda [%i1]%asi,%f0 1125 1126 fnegd %f22,%f20 1127 lda [%i1+4]%asi,%f3 1128 1129 fnegd %f30,%f28 1130 andn %l0,%i5,%l0 1131 add %i1,%i2,%i1 1132 1133 andcc %l4,2,%g0 1134 fmovdnz %icc,%f4,%f6 1135 st %f6,[%o0] 1136 1137 andcc %l1,2,%g0 1138 fmovdnz %icc,%f12,%f14 1139 st %f14,[%o1] 1140 1141 andcc %l2,2,%g0 1142 fmovdnz %icc,%f20,%f22 1143 st %f22,[%o2] 1144 1145 andcc %l3,2,%g0 1146 fmovdnz %icc,%f28,%f30 1147 st %f30,[%o3] 1148 1149 addcc %i0,-1,%i0 1150 bg,pt %icc,.loop0 1151! delay slot 1152 st %f7,[%o0+4] 1153 1154 ba,pt %icc,.end 1155! delay slot 1156 nop 1157 1158 .align 16 1159.case3: 1160 fmuld %f16,pp3,%f22 ! sin(x2) 1161 1162 fmuld %f24,pp3,%f30 ! sin(x3) 1163 1164 fmuld %f0,qq3,%f6 ! cos(x0) 1165 1166 fmuld %f8,qq3,%f14 ! cos(x1) 1167 1168 faddd %f22,pp2,%f22 1169 fmuld %f16,qq2,%f20 1170 1171 faddd %f30,pp2,%f30 1172 fmuld %f24,qq2,%f28 1173 1174 faddd %f6,qq2,%f6 1175 fmuld %f0,pp2,%f4 1176 1177 faddd %f14,qq2,%f14 1178 fmuld %f8,pp2,%f12 1179 1180 fmuld %f16,%f22,%f22 1181 faddd %f20,qq1,%f20 1182 1183 fmuld %f24,%f30,%f30 1184 faddd %f28,qq1,%f28 1185 1186 fmuld %f0,%f6,%f6 1187 faddd %f4,pp1,%f4 1188 1189 fmuld %f8,%f14,%f14 1190 faddd %f12,pp1,%f12 1191 1192 faddd %f22,pp1,%f22 1193 fmuld %f16,%f20,%f20 1194 add %l6,%g1,%l6 1195 1196 faddd %f30,pp1,%f30 1197 fmuld %f24,%f28,%f28 1198 add %l7,%g1,%l7 1199 1200 faddd %f6,qq1,%f6 1201 fmuld %f0,%f4,%f4 1202 add %l4,%g1,%l4 1203 1204 faddd %f14,qq1,%f14 1205 fmuld %f8,%f12,%f12 1206 add %l5,%g1,%l5 1207 1208 fmuld %f16,%f22,%f22 1209 1210 fmuld %f24,%f30,%f30 1211 1212 fmuld %f2,%f4,%f4 1213 1214 fmuld %f10,%f12,%f12 1215 1216 fmuld %f18,%f22,%f22 1217 ldd [%l6+8],%f16 1218 1219 fmuld %f26,%f30,%f30 1220 ldd [%l7+8],%f24 1221 1222 fmuld %f0,%f6,%f6 1223 faddd %f4,%f32,%f4 1224 ldd [%l4+16],%f0 1225 1226 fmuld %f8,%f14,%f14 1227 faddd %f12,%f34,%f12 1228 ldd [%l5+16],%f8 1229 1230 fmuld %f16,%f20,%f20 1231 faddd %f36,%f22,%f22 1232 1233 fmuld %f24,%f28,%f28 1234 faddd %f38,%f30,%f30 1235 1236 fmuld %f0,%f6,%f6 1237 faddd %f4,%f2,%f4 1238 ldd [%l4+8],%f32 1239 1240 fmuld %f8,%f14,%f14 1241 faddd %f12,%f10,%f12 1242 ldd [%l5+8],%f34 1243 1244 faddd %f18,%f22,%f22 1245 ldd [%l6+16],%f36 1246 1247 faddd %f26,%f30,%f30 1248 ldd [%l7+16],%f38 1249 1250 fmuld %f32,%f4,%f4 1251 1252 fmuld %f34,%f12,%f12 1253 1254 fmuld %f36,%f22,%f22 1255 1256 fmuld %f38,%f30,%f30 1257 1258 fsubd %f6,%f4,%f6 1259 1260 fsubd %f14,%f12,%f14 1261 1262 faddd %f22,%f20,%f22 1263 1264 faddd %f30,%f28,%f30 1265 1266 faddd %f6,%f0,%f6 1267 1268 faddd %f14,%f8,%f14 1269 1270 faddd %f22,%f16,%f22 1271 1272 faddd %f30,%f24,%f30 1273 mov %l0,%l4 1274 1275 fnegd %f6,%f4 1276 lda [%i1]%asi,%l0 ! preload next argument 1277 1278 fnegd %f14,%f12 1279 lda [%i1]%asi,%f0 1280 1281 fnegd %f22,%f20 1282 lda [%i1+4]%asi,%f3 1283 1284 fnegd %f30,%f28 1285 andn %l0,%i5,%l0 1286 add %i1,%i2,%i1 1287 1288 andcc %l4,2,%g0 1289 fmovdnz %icc,%f4,%f6 1290 st %f6,[%o0] 1291 1292 andcc %l1,2,%g0 1293 fmovdnz %icc,%f12,%f14 1294 st %f14,[%o1] 1295 1296 andcc %l2,2,%g0 1297 fmovdnz %icc,%f20,%f22 1298 st %f22,[%o2] 1299 1300 andcc %l3,2,%g0 1301 fmovdnz %icc,%f28,%f30 1302 st %f30,[%o3] 1303 1304 addcc %i0,-1,%i0 1305 bg,pt %icc,.loop0 1306! delay slot 1307 st %f7,[%o0+4] 1308 1309 ba,pt %icc,.end 1310! delay slot 1311 nop 1312 1313 .align 16 1314.case4: 1315 fmuld %f18,%f18,%f16 1316 andcc %l2,1,%g0 1317 bz,pn %icc,.case6 1318! delay slot 1319 fxor %f22,%f36,%f36 1320 1321 fmuld %f26,%f26,%f24 1322 andcc %l3,1,%g0 1323 bz,pn %icc,.case5 1324! delay slot 1325 fxor %f30,%f38,%f38 1326 1327 fmuld %f8,pp3,%f14 ! sin(x1) 1328 1329 fmuld %f0,qq3,%f6 ! cos(x0) 1330 1331 faddd %f14,pp2,%f14 1332 fmuld %f8,qq2,%f12 1333 1334 fmuld %f16,qq3,%f22 ! cos(x2) 1335 1336 fmuld %f24,qq3,%f30 ! cos(x3) 1337 1338 faddd %f6,qq2,%f6 1339 fmuld %f0,pp2,%f4 1340 1341 fmuld %f8,%f14,%f14 1342 faddd %f12,qq1,%f12 1343 1344 faddd %f22,qq2,%f22 1345 fmuld %f16,pp2,%f20 1346 1347 faddd %f30,qq2,%f30 1348 fmuld %f24,pp2,%f28 1349 1350 fmuld %f0,%f6,%f6 1351 faddd %f4,pp1,%f4 1352 1353 faddd %f14,pp1,%f14 1354 fmuld %f8,%f12,%f12 1355 add %l5,%g1,%l5 1356 1357 fmuld %f16,%f22,%f22 1358 faddd %f20,pp1,%f20 1359 1360 fmuld %f24,%f30,%f30 1361 faddd %f28,pp1,%f28 1362 1363 faddd %f6,qq1,%f6 1364 fmuld %f0,%f4,%f4 1365 add %l4,%g1,%l4 1366 1367 fmuld %f8,%f14,%f14 1368 1369 faddd %f22,qq1,%f22 1370 fmuld %f16,%f20,%f20 1371 add %l6,%g1,%l6 1372 1373 faddd %f30,qq1,%f30 1374 fmuld %f24,%f28,%f28 1375 add %l7,%g1,%l7 1376 1377 fmuld %f2,%f4,%f4 1378 1379 fmuld %f10,%f14,%f14 1380 ldd [%l5+8],%f8 1381 1382 fmuld %f18,%f20,%f20 1383 1384 fmuld %f26,%f28,%f28 1385 1386 fmuld %f0,%f6,%f6 1387 faddd %f4,%f32,%f4 1388 ldd [%l4+16],%f0 1389 1390 fmuld %f8,%f12,%f12 1391 faddd %f34,%f14,%f14 1392 1393 fmuld %f16,%f22,%f22 1394 faddd %f20,%f36,%f20 1395 ldd [%l6+16],%f16 1396 1397 fmuld %f24,%f30,%f30 1398 faddd %f28,%f38,%f28 1399 ldd [%l7+16],%f24 1400 1401 fmuld %f0,%f6,%f6 1402 faddd %f4,%f2,%f4 1403 ldd [%l4+8],%f32 1404 1405 faddd %f10,%f14,%f14 1406 ldd [%l5+16],%f34 1407 1408 fmuld %f16,%f22,%f22 1409 faddd %f20,%f18,%f20 1410 ldd [%l6+8],%f36 1411 1412 fmuld %f24,%f30,%f30 1413 faddd %f28,%f26,%f28 1414 ldd [%l7+8],%f38 1415 1416 fmuld %f32,%f4,%f4 1417 1418 fmuld %f34,%f14,%f14 1419 1420 fmuld %f36,%f20,%f20 1421 1422 fmuld %f38,%f28,%f28 1423 1424 fsubd %f6,%f4,%f6 1425 1426 faddd %f14,%f12,%f14 1427 1428 fsubd %f22,%f20,%f22 1429 1430 fsubd %f30,%f28,%f30 1431 1432 faddd %f6,%f0,%f6 1433 1434 faddd %f14,%f8,%f14 1435 1436 faddd %f22,%f16,%f22 1437 1438 faddd %f30,%f24,%f30 1439 mov %l0,%l4 1440 1441 fnegd %f6,%f4 1442 lda [%i1]%asi,%l0 ! preload next argument 1443 1444 fnegd %f14,%f12 1445 lda [%i1]%asi,%f0 1446 1447 fnegd %f22,%f20 1448 lda [%i1+4]%asi,%f3 1449 1450 fnegd %f30,%f28 1451 andn %l0,%i5,%l0 1452 add %i1,%i2,%i1 1453 1454 andcc %l4,2,%g0 1455 fmovdnz %icc,%f4,%f6 1456 st %f6,[%o0] 1457 1458 andcc %l1,2,%g0 1459 fmovdnz %icc,%f12,%f14 1460 st %f14,[%o1] 1461 1462 andcc %l2,2,%g0 1463 fmovdnz %icc,%f20,%f22 1464 st %f22,[%o2] 1465 1466 andcc %l3,2,%g0 1467 fmovdnz %icc,%f28,%f30 1468 st %f30,[%o3] 1469 1470 addcc %i0,-1,%i0 1471 bg,pt %icc,.loop0 1472! delay slot 1473 st %f7,[%o0+4] 1474 1475 ba,pt %icc,.end 1476! delay slot 1477 nop 1478 1479 .align 16 1480.case5: 1481 fmuld %f8,pp3,%f14 ! sin(x1) 1482 1483 fmuld %f24,pp3,%f30 ! sin(x3) 1484 1485 fmuld %f0,qq3,%f6 ! cos(x0) 1486 1487 faddd %f14,pp2,%f14 1488 fmuld %f8,qq2,%f12 1489 1490 fmuld %f16,qq3,%f22 ! cos(x2) 1491 1492 faddd %f30,pp2,%f30 1493 fmuld %f24,qq2,%f28 1494 1495 faddd %f6,qq2,%f6 1496 fmuld %f0,pp2,%f4 1497 1498 fmuld %f8,%f14,%f14 1499 faddd %f12,qq1,%f12 1500 1501 faddd %f22,qq2,%f22 1502 fmuld %f16,pp2,%f20 1503 1504 fmuld %f24,%f30,%f30 1505 faddd %f28,qq1,%f28 1506 1507 fmuld %f0,%f6,%f6 1508 faddd %f4,pp1,%f4 1509 1510 faddd %f14,pp1,%f14 1511 fmuld %f8,%f12,%f12 1512 add %l5,%g1,%l5 1513 1514 fmuld %f16,%f22,%f22 1515 faddd %f20,pp1,%f20 1516 1517 faddd %f30,pp1,%f30 1518 fmuld %f24,%f28,%f28 1519 add %l7,%g1,%l7 1520 1521 faddd %f6,qq1,%f6 1522 fmuld %f0,%f4,%f4 1523 add %l4,%g1,%l4 1524 1525 fmuld %f8,%f14,%f14 1526 1527 faddd %f22,qq1,%f22 1528 fmuld %f16,%f20,%f20 1529 add %l6,%g1,%l6 1530 1531 fmuld %f24,%f30,%f30 1532 1533 fmuld %f2,%f4,%f4 1534 1535 fmuld %f10,%f14,%f14 1536 ldd [%l5+8],%f8 1537 1538 fmuld %f18,%f20,%f20 1539 1540 fmuld %f26,%f30,%f30 1541 ldd [%l7+8],%f24 1542 1543 fmuld %f0,%f6,%f6 1544 faddd %f4,%f32,%f4 1545 ldd [%l4+16],%f0 1546 1547 fmuld %f8,%f12,%f12 1548 faddd %f34,%f14,%f14 1549 1550 fmuld %f16,%f22,%f22 1551 faddd %f20,%f36,%f20 1552 ldd [%l6+16],%f16 1553 1554 fmuld %f24,%f28,%f28 1555 faddd %f38,%f30,%f30 1556 1557 fmuld %f0,%f6,%f6 1558 faddd %f4,%f2,%f4 1559 ldd [%l4+8],%f32 1560 1561 faddd %f10,%f14,%f14 1562 ldd [%l5+16],%f34 1563 1564 fmuld %f16,%f22,%f22 1565 faddd %f20,%f18,%f20 1566 ldd [%l6+8],%f36 1567 1568 faddd %f26,%f30,%f30 1569 ldd [%l7+16],%f38 1570 1571 fmuld %f32,%f4,%f4 1572 1573 fmuld %f34,%f14,%f14 1574 1575 fmuld %f36,%f20,%f20 1576 1577 fmuld %f38,%f30,%f30 1578 1579 fsubd %f6,%f4,%f6 1580 1581 faddd %f14,%f12,%f14 1582 1583 fsubd %f22,%f20,%f22 1584 1585 faddd %f30,%f28,%f30 1586 1587 faddd %f6,%f0,%f6 1588 1589 faddd %f14,%f8,%f14 1590 1591 faddd %f22,%f16,%f22 1592 1593 faddd %f30,%f24,%f30 1594 mov %l0,%l4 1595 1596 fnegd %f6,%f4 1597 lda [%i1]%asi,%l0 ! preload next argument 1598 1599 fnegd %f14,%f12 1600 lda [%i1]%asi,%f0 1601 1602 fnegd %f22,%f20 1603 lda [%i1+4]%asi,%f3 1604 1605 fnegd %f30,%f28 1606 andn %l0,%i5,%l0 1607 add %i1,%i2,%i1 1608 1609 andcc %l4,2,%g0 1610 fmovdnz %icc,%f4,%f6 1611 st %f6,[%o0] 1612 1613 andcc %l1,2,%g0 1614 fmovdnz %icc,%f12,%f14 1615 st %f14,[%o1] 1616 1617 andcc %l2,2,%g0 1618 fmovdnz %icc,%f20,%f22 1619 st %f22,[%o2] 1620 1621 andcc %l3,2,%g0 1622 fmovdnz %icc,%f28,%f30 1623 st %f30,[%o3] 1624 1625 addcc %i0,-1,%i0 1626 bg,pt %icc,.loop0 1627! delay slot 1628 st %f7,[%o0+4] 1629 1630 ba,pt %icc,.end 1631! delay slot 1632 nop 1633 1634 .align 16 1635.case6: 1636 fmuld %f26,%f26,%f24 1637 andcc %l3,1,%g0 1638 bz,pn %icc,.case7 1639! delay slot 1640 fxor %f30,%f38,%f38 1641 1642 fmuld %f8,pp3,%f14 ! sin(x1) 1643 1644 fmuld %f16,pp3,%f22 ! sin(x2) 1645 1646 fmuld %f0,qq3,%f6 ! cos(x0) 1647 1648 faddd %f14,pp2,%f14 1649 fmuld %f8,qq2,%f12 1650 1651 faddd %f22,pp2,%f22 1652 fmuld %f16,qq2,%f20 1653 1654 fmuld %f24,qq3,%f30 ! cos(x3) 1655 1656 faddd %f6,qq2,%f6 1657 fmuld %f0,pp2,%f4 1658 1659 fmuld %f8,%f14,%f14 1660 faddd %f12,qq1,%f12 1661 1662 fmuld %f16,%f22,%f22 1663 faddd %f20,qq1,%f20 1664 1665 faddd %f30,qq2,%f30 1666 fmuld %f24,pp2,%f28 1667 1668 fmuld %f0,%f6,%f6 1669 faddd %f4,pp1,%f4 1670 1671 faddd %f14,pp1,%f14 1672 fmuld %f8,%f12,%f12 1673 add %l5,%g1,%l5 1674 1675 faddd %f22,pp1,%f22 1676 fmuld %f16,%f20,%f20 1677 add %l6,%g1,%l6 1678 1679 fmuld %f24,%f30,%f30 1680 faddd %f28,pp1,%f28 1681 1682 faddd %f6,qq1,%f6 1683 fmuld %f0,%f4,%f4 1684 add %l4,%g1,%l4 1685 1686 fmuld %f8,%f14,%f14 1687 1688 fmuld %f16,%f22,%f22 1689 1690 faddd %f30,qq1,%f30 1691 fmuld %f24,%f28,%f28 1692 add %l7,%g1,%l7 1693 1694 fmuld %f2,%f4,%f4 1695 1696 fmuld %f10,%f14,%f14 1697 ldd [%l5+8],%f8 1698 1699 fmuld %f18,%f22,%f22 1700 ldd [%l6+8],%f16 1701 1702 fmuld %f26,%f28,%f28 1703 1704 fmuld %f0,%f6,%f6 1705 faddd %f4,%f32,%f4 1706 ldd [%l4+16],%f0 1707 1708 fmuld %f8,%f12,%f12 1709 faddd %f34,%f14,%f14 1710 1711 fmuld %f16,%f20,%f20 1712 faddd %f36,%f22,%f22 1713 1714 fmuld %f24,%f30,%f30 1715 faddd %f28,%f38,%f28 1716 ldd [%l7+16],%f24 1717 1718 fmuld %f0,%f6,%f6 1719 faddd %f4,%f2,%f4 1720 ldd [%l4+8],%f32 1721 1722 faddd %f10,%f14,%f14 1723 ldd [%l5+16],%f34 1724 1725 faddd %f18,%f22,%f22 1726 ldd [%l6+16],%f36 1727 1728 fmuld %f24,%f30,%f30 1729 faddd %f28,%f26,%f28 1730 ldd [%l7+8],%f38 1731 1732 fmuld %f32,%f4,%f4 1733 1734 fmuld %f34,%f14,%f14 1735 1736 fmuld %f36,%f22,%f22 1737 1738 fmuld %f38,%f28,%f28 1739 1740 fsubd %f6,%f4,%f6 1741 1742 faddd %f14,%f12,%f14 1743 1744 faddd %f22,%f20,%f22 1745 1746 fsubd %f30,%f28,%f30 1747 1748 faddd %f6,%f0,%f6 1749 1750 faddd %f14,%f8,%f14 1751 1752 faddd %f22,%f16,%f22 1753 1754 faddd %f30,%f24,%f30 1755 mov %l0,%l4 1756 1757 fnegd %f6,%f4 1758 lda [%i1]%asi,%l0 ! preload next argument 1759 1760 fnegd %f14,%f12 1761 lda [%i1]%asi,%f0 1762 1763 fnegd %f22,%f20 1764 lda [%i1+4]%asi,%f3 1765 1766 fnegd %f30,%f28 1767 andn %l0,%i5,%l0 1768 add %i1,%i2,%i1 1769 1770 andcc %l4,2,%g0 1771 fmovdnz %icc,%f4,%f6 1772 st %f6,[%o0] 1773 1774 andcc %l1,2,%g0 1775 fmovdnz %icc,%f12,%f14 1776 st %f14,[%o1] 1777 1778 andcc %l2,2,%g0 1779 fmovdnz %icc,%f20,%f22 1780 st %f22,[%o2] 1781 1782 andcc %l3,2,%g0 1783 fmovdnz %icc,%f28,%f30 1784 st %f30,[%o3] 1785 1786 addcc %i0,-1,%i0 1787 bg,pt %icc,.loop0 1788! delay slot 1789 st %f7,[%o0+4] 1790 1791 ba,pt %icc,.end 1792! delay slot 1793 nop 1794 1795 .align 16 1796.case7: 1797 fmuld %f8,pp3,%f14 ! sin(x1) 1798 1799 fmuld %f16,pp3,%f22 ! sin(x2) 1800 1801 fmuld %f24,pp3,%f30 ! sin(x3) 1802 1803 fmuld %f0,qq3,%f6 ! cos(x0) 1804 1805 faddd %f14,pp2,%f14 1806 fmuld %f8,qq2,%f12 1807 1808 faddd %f22,pp2,%f22 1809 fmuld %f16,qq2,%f20 1810 1811 faddd %f30,pp2,%f30 1812 fmuld %f24,qq2,%f28 1813 1814 faddd %f6,qq2,%f6 1815 fmuld %f0,pp2,%f4 1816 1817 fmuld %f8,%f14,%f14 1818 faddd %f12,qq1,%f12 1819 1820 fmuld %f16,%f22,%f22 1821 faddd %f20,qq1,%f20 1822 1823 fmuld %f24,%f30,%f30 1824 faddd %f28,qq1,%f28 1825 1826 fmuld %f0,%f6,%f6 1827 faddd %f4,pp1,%f4 1828 1829 faddd %f14,pp1,%f14 1830 fmuld %f8,%f12,%f12 1831 add %l5,%g1,%l5 1832 1833 faddd %f22,pp1,%f22 1834 fmuld %f16,%f20,%f20 1835 add %l6,%g1,%l6 1836 1837 faddd %f30,pp1,%f30 1838 fmuld %f24,%f28,%f28 1839 add %l7,%g1,%l7 1840 1841 faddd %f6,qq1,%f6 1842 fmuld %f0,%f4,%f4 1843 add %l4,%g1,%l4 1844 1845 fmuld %f8,%f14,%f14 1846 1847 fmuld %f16,%f22,%f22 1848 1849 fmuld %f24,%f30,%f30 1850 1851 fmuld %f2,%f4,%f4 1852 1853 fmuld %f10,%f14,%f14 1854 ldd [%l5+8],%f8 1855 1856 fmuld %f18,%f22,%f22 1857 ldd [%l6+8],%f16 1858 1859 fmuld %f26,%f30,%f30 1860 ldd [%l7+8],%f24 1861 1862 fmuld %f0,%f6,%f6 1863 faddd %f4,%f32,%f4 1864 ldd [%l4+16],%f0 1865 1866 fmuld %f8,%f12,%f12 1867 faddd %f34,%f14,%f14 1868 1869 fmuld %f16,%f20,%f20 1870 faddd %f36,%f22,%f22 1871 1872 fmuld %f24,%f28,%f28 1873 faddd %f38,%f30,%f30 1874 1875 fmuld %f0,%f6,%f6 1876 faddd %f4,%f2,%f4 1877 ldd [%l4+8],%f32 1878 1879 faddd %f10,%f14,%f14 1880 ldd [%l5+16],%f34 1881 1882 faddd %f18,%f22,%f22 1883 ldd [%l6+16],%f36 1884 1885 faddd %f26,%f30,%f30 1886 ldd [%l7+16],%f38 1887 1888 fmuld %f32,%f4,%f4 1889 1890 fmuld %f34,%f14,%f14 1891 1892 fmuld %f36,%f22,%f22 1893 1894 fmuld %f38,%f30,%f30 1895 1896 fsubd %f6,%f4,%f6 1897 1898 faddd %f14,%f12,%f14 1899 1900 faddd %f22,%f20,%f22 1901 1902 faddd %f30,%f28,%f30 1903 1904 faddd %f6,%f0,%f6 1905 1906 faddd %f14,%f8,%f14 1907 1908 faddd %f22,%f16,%f22 1909 1910 faddd %f30,%f24,%f30 1911 mov %l0,%l4 1912 1913 fnegd %f6,%f4 1914 lda [%i1]%asi,%l0 ! preload next argument 1915 1916 fnegd %f14,%f12 1917 lda [%i1]%asi,%f0 1918 1919 fnegd %f22,%f20 1920 lda [%i1+4]%asi,%f3 1921 1922 fnegd %f30,%f28 1923 andn %l0,%i5,%l0 1924 add %i1,%i2,%i1 1925 1926 andcc %l4,2,%g0 1927 fmovdnz %icc,%f4,%f6 1928 st %f6,[%o0] 1929 1930 andcc %l1,2,%g0 1931 fmovdnz %icc,%f12,%f14 1932 st %f14,[%o1] 1933 1934 andcc %l2,2,%g0 1935 fmovdnz %icc,%f20,%f22 1936 st %f22,[%o2] 1937 1938 andcc %l3,2,%g0 1939 fmovdnz %icc,%f28,%f30 1940 st %f30,[%o3] 1941 1942 addcc %i0,-1,%i0 1943 bg,pt %icc,.loop0 1944! delay slot 1945 st %f7,[%o0+4] 1946 1947 ba,pt %icc,.end 1948! delay slot 1949 nop 1950 1951 .align 16 1952.case8: 1953 fmuld %f10,%f10,%f8 1954 andcc %l1,1,%g0 1955 bz,pn %icc,.case12 1956! delay slot 1957 fxor %f14,%f34,%f34 1958 1959 fmuld %f18,%f18,%f16 1960 andcc %l2,1,%g0 1961 bz,pn %icc,.case10 1962! delay slot 1963 fxor %f22,%f36,%f36 1964 1965 fmuld %f26,%f26,%f24 1966 andcc %l3,1,%g0 1967 bz,pn %icc,.case9 1968! delay slot 1969 fxor %f30,%f38,%f38 1970 1971 fmuld %f0,pp3,%f6 ! sin(x0) 1972 1973 faddd %f6,pp2,%f6 1974 fmuld %f0,qq2,%f4 1975 1976 fmuld %f8,qq3,%f14 ! cos(x1) 1977 1978 fmuld %f16,qq3,%f22 ! cos(x2) 1979 1980 fmuld %f24,qq3,%f30 ! cos(x3) 1981 1982 fmuld %f0,%f6,%f6 1983 faddd %f4,qq1,%f4 1984 1985 faddd %f14,qq2,%f14 1986 fmuld %f8,pp2,%f12 1987 1988 faddd %f22,qq2,%f22 1989 fmuld %f16,pp2,%f20 1990 1991 faddd %f30,qq2,%f30 1992 fmuld %f24,pp2,%f28 1993 1994 faddd %f6,pp1,%f6 1995 fmuld %f0,%f4,%f4 1996 add %l4,%g1,%l4 1997 1998 fmuld %f8,%f14,%f14 1999 faddd %f12,pp1,%f12 2000 2001 fmuld %f16,%f22,%f22 2002 faddd %f20,pp1,%f20 2003 2004 fmuld %f24,%f30,%f30 2005 faddd %f28,pp1,%f28 2006 2007 fmuld %f0,%f6,%f6 2008 2009 faddd %f14,qq1,%f14 2010 fmuld %f8,%f12,%f12 2011 add %l5,%g1,%l5 2012 2013 faddd %f22,qq1,%f22 2014 fmuld %f16,%f20,%f20 2015 add %l6,%g1,%l6 2016 2017 faddd %f30,qq1,%f30 2018 fmuld %f24,%f28,%f28 2019 add %l7,%g1,%l7 2020 2021 fmuld %f2,%f6,%f6 2022 ldd [%l4+8],%f0 2023 2024 fmuld %f10,%f12,%f12 2025 2026 fmuld %f18,%f20,%f20 2027 2028 fmuld %f26,%f28,%f28 2029 2030 fmuld %f0,%f4,%f4 2031 faddd %f32,%f6,%f6 2032 2033 fmuld %f8,%f14,%f14 2034 faddd %f12,%f34,%f12 2035 ldd [%l5+16],%f8 2036 2037 fmuld %f16,%f22,%f22 2038 faddd %f20,%f36,%f20 2039 ldd [%l6+16],%f16 2040 2041 fmuld %f24,%f30,%f30 2042 faddd %f28,%f38,%f28 2043 ldd [%l7+16],%f24 2044 2045 faddd %f2,%f6,%f6 2046 ldd [%l4+16],%f32 2047 2048 fmuld %f8,%f14,%f14 2049 faddd %f12,%f10,%f12 2050 ldd [%l5+8],%f34 2051 2052 fmuld %f16,%f22,%f22 2053 faddd %f20,%f18,%f20 2054 ldd [%l6+8],%f36 2055 2056 fmuld %f24,%f30,%f30 2057 faddd %f28,%f26,%f28 2058 ldd [%l7+8],%f38 2059 2060 fmuld %f32,%f6,%f6 2061 2062 fmuld %f34,%f12,%f12 2063 2064 fmuld %f36,%f20,%f20 2065 2066 fmuld %f38,%f28,%f28 2067 2068 faddd %f6,%f4,%f6 2069 2070 fsubd %f14,%f12,%f14 2071 2072 fsubd %f22,%f20,%f22 2073 2074 fsubd %f30,%f28,%f30 2075 2076 faddd %f6,%f0,%f6 2077 2078 faddd %f14,%f8,%f14 2079 2080 faddd %f22,%f16,%f22 2081 2082 faddd %f30,%f24,%f30 2083 mov %l0,%l4 2084 2085 fnegd %f6,%f4 2086 lda [%i1]%asi,%l0 ! preload next argument 2087 2088 fnegd %f14,%f12 2089 lda [%i1]%asi,%f0 2090 2091 fnegd %f22,%f20 2092 lda [%i1+4]%asi,%f3 2093 2094 fnegd %f30,%f28 2095 andn %l0,%i5,%l0 2096 add %i1,%i2,%i1 2097 2098 andcc %l4,2,%g0 2099 fmovdnz %icc,%f4,%f6 2100 st %f6,[%o0] 2101 2102 andcc %l1,2,%g0 2103 fmovdnz %icc,%f12,%f14 2104 st %f14,[%o1] 2105 2106 andcc %l2,2,%g0 2107 fmovdnz %icc,%f20,%f22 2108 st %f22,[%o2] 2109 2110 andcc %l3,2,%g0 2111 fmovdnz %icc,%f28,%f30 2112 st %f30,[%o3] 2113 2114 addcc %i0,-1,%i0 2115 bg,pt %icc,.loop0 2116! delay slot 2117 st %f7,[%o0+4] 2118 2119 ba,pt %icc,.end 2120! delay slot 2121 nop 2122 2123 .align 16 2124.case9: 2125 fmuld %f0,pp3,%f6 ! sin(x0) 2126 2127 fmuld %f24,pp3,%f30 ! sin(x3) 2128 2129 faddd %f6,pp2,%f6 2130 fmuld %f0,qq2,%f4 2131 2132 fmuld %f8,qq3,%f14 ! cos(x1) 2133 2134 fmuld %f16,qq3,%f22 ! cos(x2) 2135 2136 faddd %f30,pp2,%f30 2137 fmuld %f24,qq2,%f28 2138 2139 fmuld %f0,%f6,%f6 2140 faddd %f4,qq1,%f4 2141 2142 faddd %f14,qq2,%f14 2143 fmuld %f8,pp2,%f12 2144 2145 faddd %f22,qq2,%f22 2146 fmuld %f16,pp2,%f20 2147 2148 fmuld %f24,%f30,%f30 2149 faddd %f28,qq1,%f28 2150 2151 faddd %f6,pp1,%f6 2152 fmuld %f0,%f4,%f4 2153 add %l4,%g1,%l4 2154 2155 fmuld %f8,%f14,%f14 2156 faddd %f12,pp1,%f12 2157 2158 fmuld %f16,%f22,%f22 2159 faddd %f20,pp1,%f20 2160 2161 faddd %f30,pp1,%f30 2162 fmuld %f24,%f28,%f28 2163 add %l7,%g1,%l7 2164 2165 fmuld %f0,%f6,%f6 2166 2167 faddd %f14,qq1,%f14 2168 fmuld %f8,%f12,%f12 2169 add %l5,%g1,%l5 2170 2171 faddd %f22,qq1,%f22 2172 fmuld %f16,%f20,%f20 2173 add %l6,%g1,%l6 2174 2175 fmuld %f24,%f30,%f30 2176 2177 fmuld %f2,%f6,%f6 2178 ldd [%l4+8],%f0 2179 2180 fmuld %f10,%f12,%f12 2181 2182 fmuld %f18,%f20,%f20 2183 2184 fmuld %f26,%f30,%f30 2185 ldd [%l7+8],%f24 2186 2187 fmuld %f0,%f4,%f4 2188 faddd %f32,%f6,%f6 2189 2190 fmuld %f8,%f14,%f14 2191 faddd %f12,%f34,%f12 2192 ldd [%l5+16],%f8 2193 2194 fmuld %f16,%f22,%f22 2195 faddd %f20,%f36,%f20 2196 ldd [%l6+16],%f16 2197 2198 fmuld %f24,%f28,%f28 2199 faddd %f38,%f30,%f30 2200 2201 faddd %f2,%f6,%f6 2202 ldd [%l4+16],%f32 2203 2204 fmuld %f8,%f14,%f14 2205 faddd %f12,%f10,%f12 2206 ldd [%l5+8],%f34 2207 2208 fmuld %f16,%f22,%f22 2209 faddd %f20,%f18,%f20 2210 ldd [%l6+8],%f36 2211 2212 faddd %f26,%f30,%f30 2213 ldd [%l7+16],%f38 2214 2215 fmuld %f32,%f6,%f6 2216 2217 fmuld %f34,%f12,%f12 2218 2219 fmuld %f36,%f20,%f20 2220 2221 fmuld %f38,%f30,%f30 2222 2223 faddd %f6,%f4,%f6 2224 2225 fsubd %f14,%f12,%f14 2226 2227 fsubd %f22,%f20,%f22 2228 2229 faddd %f30,%f28,%f30 2230 2231 faddd %f6,%f0,%f6 2232 2233 faddd %f14,%f8,%f14 2234 2235 faddd %f22,%f16,%f22 2236 2237 faddd %f30,%f24,%f30 2238 mov %l0,%l4 2239 2240 fnegd %f6,%f4 2241 lda [%i1]%asi,%l0 ! preload next argument 2242 2243 fnegd %f14,%f12 2244 lda [%i1]%asi,%f0 2245 2246 fnegd %f22,%f20 2247 lda [%i1+4]%asi,%f3 2248 2249 fnegd %f30,%f28 2250 andn %l0,%i5,%l0 2251 add %i1,%i2,%i1 2252 2253 andcc %l4,2,%g0 2254 fmovdnz %icc,%f4,%f6 2255 st %f6,[%o0] 2256 2257 andcc %l1,2,%g0 2258 fmovdnz %icc,%f12,%f14 2259 st %f14,[%o1] 2260 2261 andcc %l2,2,%g0 2262 fmovdnz %icc,%f20,%f22 2263 st %f22,[%o2] 2264 2265 andcc %l3,2,%g0 2266 fmovdnz %icc,%f28,%f30 2267 st %f30,[%o3] 2268 2269 addcc %i0,-1,%i0 2270 bg,pt %icc,.loop0 2271! delay slot 2272 st %f7,[%o0+4] 2273 2274 ba,pt %icc,.end 2275! delay slot 2276 nop 2277 2278 .align 16 2279.case10: 2280 fmuld %f26,%f26,%f24 2281 andcc %l3,1,%g0 2282 bz,pn %icc,.case11 2283! delay slot 2284 fxor %f30,%f38,%f38 2285 2286 fmuld %f0,pp3,%f6 ! sin(x0) 2287 2288 fmuld %f16,pp3,%f22 ! sin(x2) 2289 2290 faddd %f6,pp2,%f6 2291 fmuld %f0,qq2,%f4 2292 2293 fmuld %f8,qq3,%f14 ! cos(x1) 2294 2295 faddd %f22,pp2,%f22 2296 fmuld %f16,qq2,%f20 2297 2298 fmuld %f24,qq3,%f30 ! cos(x3) 2299 2300 fmuld %f0,%f6,%f6 2301 faddd %f4,qq1,%f4 2302 2303 faddd %f14,qq2,%f14 2304 fmuld %f8,pp2,%f12 2305 2306 fmuld %f16,%f22,%f22 2307 faddd %f20,qq1,%f20 2308 2309 faddd %f30,qq2,%f30 2310 fmuld %f24,pp2,%f28 2311 2312 faddd %f6,pp1,%f6 2313 fmuld %f0,%f4,%f4 2314 add %l4,%g1,%l4 2315 2316 fmuld %f8,%f14,%f14 2317 faddd %f12,pp1,%f12 2318 2319 faddd %f22,pp1,%f22 2320 fmuld %f16,%f20,%f20 2321 add %l6,%g1,%l6 2322 2323 fmuld %f24,%f30,%f30 2324 faddd %f28,pp1,%f28 2325 2326 fmuld %f0,%f6,%f6 2327 2328 faddd %f14,qq1,%f14 2329 fmuld %f8,%f12,%f12 2330 add %l5,%g1,%l5 2331 2332 fmuld %f16,%f22,%f22 2333 2334 faddd %f30,qq1,%f30 2335 fmuld %f24,%f28,%f28 2336 add %l7,%g1,%l7 2337 2338 fmuld %f2,%f6,%f6 2339 ldd [%l4+8],%f0 2340 2341 fmuld %f10,%f12,%f12 2342 2343 fmuld %f18,%f22,%f22 2344 ldd [%l6+8],%f16 2345 2346 fmuld %f26,%f28,%f28 2347 2348 fmuld %f0,%f4,%f4 2349 faddd %f32,%f6,%f6 2350 2351 fmuld %f8,%f14,%f14 2352 faddd %f12,%f34,%f12 2353 ldd [%l5+16],%f8 2354 2355 fmuld %f16,%f20,%f20 2356 faddd %f36,%f22,%f22 2357 2358 fmuld %f24,%f30,%f30 2359 faddd %f28,%f38,%f28 2360 ldd [%l7+16],%f24 2361 2362 faddd %f2,%f6,%f6 2363 ldd [%l4+16],%f32 2364 2365 fmuld %f8,%f14,%f14 2366 faddd %f12,%f10,%f12 2367 ldd [%l5+8],%f34 2368 2369 faddd %f18,%f22,%f22 2370 ldd [%l6+16],%f36 2371 2372 fmuld %f24,%f30,%f30 2373 faddd %f28,%f26,%f28 2374 ldd [%l7+8],%f38 2375 2376 fmuld %f32,%f6,%f6 2377 2378 fmuld %f34,%f12,%f12 2379 2380 fmuld %f36,%f22,%f22 2381 2382 fmuld %f38,%f28,%f28 2383 2384 faddd %f6,%f4,%f6 2385 2386 fsubd %f14,%f12,%f14 2387 2388 faddd %f22,%f20,%f22 2389 2390 fsubd %f30,%f28,%f30 2391 2392 faddd %f6,%f0,%f6 2393 2394 faddd %f14,%f8,%f14 2395 2396 faddd %f22,%f16,%f22 2397 2398 faddd %f30,%f24,%f30 2399 mov %l0,%l4 2400 2401 fnegd %f6,%f4 2402 lda [%i1]%asi,%l0 ! preload next argument 2403 2404 fnegd %f14,%f12 2405 lda [%i1]%asi,%f0 2406 2407 fnegd %f22,%f20 2408 lda [%i1+4]%asi,%f3 2409 2410 fnegd %f30,%f28 2411 andn %l0,%i5,%l0 2412 add %i1,%i2,%i1 2413 2414 andcc %l4,2,%g0 2415 fmovdnz %icc,%f4,%f6 2416 st %f6,[%o0] 2417 2418 andcc %l1,2,%g0 2419 fmovdnz %icc,%f12,%f14 2420 st %f14,[%o1] 2421 2422 andcc %l2,2,%g0 2423 fmovdnz %icc,%f20,%f22 2424 st %f22,[%o2] 2425 2426 andcc %l3,2,%g0 2427 fmovdnz %icc,%f28,%f30 2428 st %f30,[%o3] 2429 2430 addcc %i0,-1,%i0 2431 bg,pt %icc,.loop0 2432! delay slot 2433 st %f7,[%o0+4] 2434 2435 ba,pt %icc,.end 2436! delay slot 2437 nop 2438 2439 .align 16 2440.case11: 2441 fmuld %f0,pp3,%f6 ! sin(x0) 2442 2443 fmuld %f16,pp3,%f22 ! sin(x2) 2444 2445 fmuld %f24,pp3,%f30 ! sin(x3) 2446 2447 faddd %f6,pp2,%f6 2448 fmuld %f0,qq2,%f4 2449 2450 fmuld %f8,qq3,%f14 ! cos(x1) 2451 2452 faddd %f22,pp2,%f22 2453 fmuld %f16,qq2,%f20 2454 2455 faddd %f30,pp2,%f30 2456 fmuld %f24,qq2,%f28 2457 2458 fmuld %f0,%f6,%f6 2459 faddd %f4,qq1,%f4 2460 2461 faddd %f14,qq2,%f14 2462 fmuld %f8,pp2,%f12 2463 2464 fmuld %f16,%f22,%f22 2465 faddd %f20,qq1,%f20 2466 2467 fmuld %f24,%f30,%f30 2468 faddd %f28,qq1,%f28 2469 2470 faddd %f6,pp1,%f6 2471 fmuld %f0,%f4,%f4 2472 add %l4,%g1,%l4 2473 2474 fmuld %f8,%f14,%f14 2475 faddd %f12,pp1,%f12 2476 2477 faddd %f22,pp1,%f22 2478 fmuld %f16,%f20,%f20 2479 add %l6,%g1,%l6 2480 2481 faddd %f30,pp1,%f30 2482 fmuld %f24,%f28,%f28 2483 add %l7,%g1,%l7 2484 2485 fmuld %f0,%f6,%f6 2486 2487 faddd %f14,qq1,%f14 2488 fmuld %f8,%f12,%f12 2489 add %l5,%g1,%l5 2490 2491 fmuld %f16,%f22,%f22 2492 2493 fmuld %f24,%f30,%f30 2494 2495 fmuld %f2,%f6,%f6 2496 ldd [%l4+8],%f0 2497 2498 fmuld %f10,%f12,%f12 2499 2500 fmuld %f18,%f22,%f22 2501 ldd [%l6+8],%f16 2502 2503 fmuld %f26,%f30,%f30 2504 ldd [%l7+8],%f24 2505 2506 fmuld %f0,%f4,%f4 2507 faddd %f32,%f6,%f6 2508 2509 fmuld %f8,%f14,%f14 2510 faddd %f12,%f34,%f12 2511 ldd [%l5+16],%f8 2512 2513 fmuld %f16,%f20,%f20 2514 faddd %f36,%f22,%f22 2515 2516 fmuld %f24,%f28,%f28 2517 faddd %f38,%f30,%f30 2518 2519 faddd %f2,%f6,%f6 2520 ldd [%l4+16],%f32 2521 2522 fmuld %f8,%f14,%f14 2523 faddd %f12,%f10,%f12 2524 ldd [%l5+8],%f34 2525 2526 faddd %f18,%f22,%f22 2527 ldd [%l6+16],%f36 2528 2529 faddd %f26,%f30,%f30 2530 ldd [%l7+16],%f38 2531 2532 fmuld %f32,%f6,%f6 2533 2534 fmuld %f34,%f12,%f12 2535 2536 fmuld %f36,%f22,%f22 2537 2538 fmuld %f38,%f30,%f30 2539 2540 faddd %f6,%f4,%f6 2541 2542 fsubd %f14,%f12,%f14 2543 2544 faddd %f22,%f20,%f22 2545 2546 faddd %f30,%f28,%f30 2547 2548 faddd %f6,%f0,%f6 2549 2550 faddd %f14,%f8,%f14 2551 2552 faddd %f22,%f16,%f22 2553 2554 faddd %f30,%f24,%f30 2555 mov %l0,%l4 2556 2557 fnegd %f6,%f4 2558 lda [%i1]%asi,%l0 ! preload next argument 2559 2560 fnegd %f14,%f12 2561 lda [%i1]%asi,%f0 2562 2563 fnegd %f22,%f20 2564 lda [%i1+4]%asi,%f3 2565 2566 fnegd %f30,%f28 2567 andn %l0,%i5,%l0 2568 add %i1,%i2,%i1 2569 2570 andcc %l4,2,%g0 2571 fmovdnz %icc,%f4,%f6 2572 st %f6,[%o0] 2573 2574 andcc %l1,2,%g0 2575 fmovdnz %icc,%f12,%f14 2576 st %f14,[%o1] 2577 2578 andcc %l2,2,%g0 2579 fmovdnz %icc,%f20,%f22 2580 st %f22,[%o2] 2581 2582 andcc %l3,2,%g0 2583 fmovdnz %icc,%f28,%f30 2584 st %f30,[%o3] 2585 2586 addcc %i0,-1,%i0 2587 bg,pt %icc,.loop0 2588! delay slot 2589 st %f7,[%o0+4] 2590 2591 ba,pt %icc,.end 2592! delay slot 2593 nop 2594 2595 .align 16 2596.case12: 2597 fmuld %f18,%f18,%f16 2598 andcc %l2,1,%g0 2599 bz,pn %icc,.case14 2600! delay slot 2601 fxor %f22,%f36,%f36 2602 2603 fmuld %f26,%f26,%f24 2604 andcc %l3,1,%g0 2605 bz,pn %icc,.case13 2606! delay slot 2607 fxor %f30,%f38,%f38 2608 2609 fmuld %f0,pp3,%f6 ! sin(x0) 2610 2611 fmuld %f8,pp3,%f14 ! sin(x1) 2612 2613 faddd %f6,pp2,%f6 2614 fmuld %f0,qq2,%f4 2615 2616 faddd %f14,pp2,%f14 2617 fmuld %f8,qq2,%f12 2618 2619 fmuld %f16,qq3,%f22 ! cos(x2) 2620 2621 fmuld %f24,qq3,%f30 ! cos(x3) 2622 2623 fmuld %f0,%f6,%f6 2624 faddd %f4,qq1,%f4 2625 2626 fmuld %f8,%f14,%f14 2627 faddd %f12,qq1,%f12 2628 2629 faddd %f22,qq2,%f22 2630 fmuld %f16,pp2,%f20 2631 2632 faddd %f30,qq2,%f30 2633 fmuld %f24,pp2,%f28 2634 2635 faddd %f6,pp1,%f6 2636 fmuld %f0,%f4,%f4 2637 add %l4,%g1,%l4 2638 2639 faddd %f14,pp1,%f14 2640 fmuld %f8,%f12,%f12 2641 add %l5,%g1,%l5 2642 2643 fmuld %f16,%f22,%f22 2644 faddd %f20,pp1,%f20 2645 2646 fmuld %f24,%f30,%f30 2647 faddd %f28,pp1,%f28 2648 2649 fmuld %f0,%f6,%f6 2650 2651 fmuld %f8,%f14,%f14 2652 2653 faddd %f22,qq1,%f22 2654 fmuld %f16,%f20,%f20 2655 add %l6,%g1,%l6 2656 2657 faddd %f30,qq1,%f30 2658 fmuld %f24,%f28,%f28 2659 add %l7,%g1,%l7 2660 2661 fmuld %f2,%f6,%f6 2662 ldd [%l4+8],%f0 2663 2664 fmuld %f10,%f14,%f14 2665 ldd [%l5+8],%f8 2666 2667 fmuld %f18,%f20,%f20 2668 2669 fmuld %f26,%f28,%f28 2670 2671 fmuld %f0,%f4,%f4 2672 faddd %f32,%f6,%f6 2673 2674 fmuld %f8,%f12,%f12 2675 faddd %f34,%f14,%f14 2676 2677 fmuld %f16,%f22,%f22 2678 faddd %f20,%f36,%f20 2679 ldd [%l6+16],%f16 2680 2681 fmuld %f24,%f30,%f30 2682 faddd %f28,%f38,%f28 2683 ldd [%l7+16],%f24 2684 2685 faddd %f2,%f6,%f6 2686 ldd [%l4+16],%f32 2687 2688 faddd %f10,%f14,%f14 2689 ldd [%l5+16],%f34 2690 2691 fmuld %f16,%f22,%f22 2692 faddd %f20,%f18,%f20 2693 ldd [%l6+8],%f36 2694 2695 fmuld %f24,%f30,%f30 2696 faddd %f28,%f26,%f28 2697 ldd [%l7+8],%f38 2698 2699 fmuld %f32,%f6,%f6 2700 2701 fmuld %f34,%f14,%f14 2702 2703 fmuld %f36,%f20,%f20 2704 2705 fmuld %f38,%f28,%f28 2706 2707 faddd %f6,%f4,%f6 2708 2709 faddd %f14,%f12,%f14 2710 2711 fsubd %f22,%f20,%f22 2712 2713 fsubd %f30,%f28,%f30 2714 2715 faddd %f6,%f0,%f6 2716 2717 faddd %f14,%f8,%f14 2718 2719 faddd %f22,%f16,%f22 2720 2721 faddd %f30,%f24,%f30 2722 mov %l0,%l4 2723 2724 fnegd %f6,%f4 2725 lda [%i1]%asi,%l0 ! preload next argument 2726 2727 fnegd %f14,%f12 2728 lda [%i1]%asi,%f0 2729 2730 fnegd %f22,%f20 2731 lda [%i1+4]%asi,%f3 2732 2733 fnegd %f30,%f28 2734 andn %l0,%i5,%l0 2735 add %i1,%i2,%i1 2736 2737 andcc %l4,2,%g0 2738 fmovdnz %icc,%f4,%f6 2739 st %f6,[%o0] 2740 2741 andcc %l1,2,%g0 2742 fmovdnz %icc,%f12,%f14 2743 st %f14,[%o1] 2744 2745 andcc %l2,2,%g0 2746 fmovdnz %icc,%f20,%f22 2747 st %f22,[%o2] 2748 2749 andcc %l3,2,%g0 2750 fmovdnz %icc,%f28,%f30 2751 st %f30,[%o3] 2752 2753 addcc %i0,-1,%i0 2754 bg,pt %icc,.loop0 2755! delay slot 2756 st %f7,[%o0+4] 2757 2758 ba,pt %icc,.end 2759! delay slot 2760 nop 2761 2762 .align 16 2763.case13: 2764 fmuld %f0,pp3,%f6 ! sin(x0) 2765 2766 fmuld %f8,pp3,%f14 ! sin(x1) 2767 2768 fmuld %f24,pp3,%f30 ! sin(x3) 2769 2770 faddd %f6,pp2,%f6 2771 fmuld %f0,qq2,%f4 2772 2773 faddd %f14,pp2,%f14 2774 fmuld %f8,qq2,%f12 2775 2776 fmuld %f16,qq3,%f22 ! cos(x2) 2777 2778 faddd %f30,pp2,%f30 2779 fmuld %f24,qq2,%f28 2780 2781 fmuld %f0,%f6,%f6 2782 faddd %f4,qq1,%f4 2783 2784 fmuld %f8,%f14,%f14 2785 faddd %f12,qq1,%f12 2786 2787 faddd %f22,qq2,%f22 2788 fmuld %f16,pp2,%f20 2789 2790 fmuld %f24,%f30,%f30 2791 faddd %f28,qq1,%f28 2792 2793 faddd %f6,pp1,%f6 2794 fmuld %f0,%f4,%f4 2795 add %l4,%g1,%l4 2796 2797 faddd %f14,pp1,%f14 2798 fmuld %f8,%f12,%f12 2799 add %l5,%g1,%l5 2800 2801 fmuld %f16,%f22,%f22 2802 faddd %f20,pp1,%f20 2803 2804 faddd %f30,pp1,%f30 2805 fmuld %f24,%f28,%f28 2806 add %l7,%g1,%l7 2807 2808 fmuld %f0,%f6,%f6 2809 2810 fmuld %f8,%f14,%f14 2811 2812 faddd %f22,qq1,%f22 2813 fmuld %f16,%f20,%f20 2814 add %l6,%g1,%l6 2815 2816 fmuld %f24,%f30,%f30 2817 2818 fmuld %f2,%f6,%f6 2819 ldd [%l4+8],%f0 2820 2821 fmuld %f10,%f14,%f14 2822 ldd [%l5+8],%f8 2823 2824 fmuld %f18,%f20,%f20 2825 2826 fmuld %f26,%f30,%f30 2827 ldd [%l7+8],%f24 2828 2829 fmuld %f0,%f4,%f4 2830 faddd %f32,%f6,%f6 2831 2832 fmuld %f8,%f12,%f12 2833 faddd %f34,%f14,%f14 2834 2835 fmuld %f16,%f22,%f22 2836 faddd %f20,%f36,%f20 2837 ldd [%l6+16],%f16 2838 2839 fmuld %f24,%f28,%f28 2840 faddd %f38,%f30,%f30 2841 2842 faddd %f2,%f6,%f6 2843 ldd [%l4+16],%f32 2844 2845 faddd %f10,%f14,%f14 2846 ldd [%l5+16],%f34 2847 2848 fmuld %f16,%f22,%f22 2849 faddd %f20,%f18,%f20 2850 ldd [%l6+8],%f36 2851 2852 faddd %f26,%f30,%f30 2853 ldd [%l7+16],%f38 2854 2855 fmuld %f32,%f6,%f6 2856 2857 fmuld %f34,%f14,%f14 2858 2859 fmuld %f36,%f20,%f20 2860 2861 fmuld %f38,%f30,%f30 2862 2863 faddd %f6,%f4,%f6 2864 2865 faddd %f14,%f12,%f14 2866 2867 fsubd %f22,%f20,%f22 2868 2869 faddd %f30,%f28,%f30 2870 2871 faddd %f6,%f0,%f6 2872 2873 faddd %f14,%f8,%f14 2874 2875 faddd %f22,%f16,%f22 2876 2877 faddd %f30,%f24,%f30 2878 mov %l0,%l4 2879 2880 fnegd %f6,%f4 2881 lda [%i1]%asi,%l0 ! preload next argument 2882 2883 fnegd %f14,%f12 2884 lda [%i1]%asi,%f0 2885 2886 fnegd %f22,%f20 2887 lda [%i1+4]%asi,%f3 2888 2889 fnegd %f30,%f28 2890 andn %l0,%i5,%l0 2891 add %i1,%i2,%i1 2892 2893 andcc %l4,2,%g0 2894 fmovdnz %icc,%f4,%f6 2895 st %f6,[%o0] 2896 2897 andcc %l1,2,%g0 2898 fmovdnz %icc,%f12,%f14 2899 st %f14,[%o1] 2900 2901 andcc %l2,2,%g0 2902 fmovdnz %icc,%f20,%f22 2903 st %f22,[%o2] 2904 2905 andcc %l3,2,%g0 2906 fmovdnz %icc,%f28,%f30 2907 st %f30,[%o3] 2908 2909 addcc %i0,-1,%i0 2910 bg,pt %icc,.loop0 2911! delay slot 2912 st %f7,[%o0+4] 2913 2914 ba,pt %icc,.end 2915! delay slot 2916 nop 2917 2918 .align 16 2919.case14: 2920 fmuld %f26,%f26,%f24 2921 andcc %l3,1,%g0 2922 bz,pn %icc,.case15 2923! delay slot 2924 fxor %f30,%f38,%f38 2925 2926 fmuld %f0,pp3,%f6 ! sin(x0) 2927 2928 fmuld %f8,pp3,%f14 ! sin(x1) 2929 2930 fmuld %f16,pp3,%f22 ! sin(x2) 2931 2932 faddd %f6,pp2,%f6 2933 fmuld %f0,qq2,%f4 2934 2935 faddd %f14,pp2,%f14 2936 fmuld %f8,qq2,%f12 2937 2938 faddd %f22,pp2,%f22 2939 fmuld %f16,qq2,%f20 2940 2941 fmuld %f24,qq3,%f30 ! cos(x3) 2942 2943 fmuld %f0,%f6,%f6 2944 faddd %f4,qq1,%f4 2945 2946 fmuld %f8,%f14,%f14 2947 faddd %f12,qq1,%f12 2948 2949 fmuld %f16,%f22,%f22 2950 faddd %f20,qq1,%f20 2951 2952 faddd %f30,qq2,%f30 2953 fmuld %f24,pp2,%f28 2954 2955 faddd %f6,pp1,%f6 2956 fmuld %f0,%f4,%f4 2957 add %l4,%g1,%l4 2958 2959 faddd %f14,pp1,%f14 2960 fmuld %f8,%f12,%f12 2961 add %l5,%g1,%l5 2962 2963 faddd %f22,pp1,%f22 2964 fmuld %f16,%f20,%f20 2965 add %l6,%g1,%l6 2966 2967 fmuld %f24,%f30,%f30 2968 faddd %f28,pp1,%f28 2969 2970 fmuld %f0,%f6,%f6 2971 2972 fmuld %f8,%f14,%f14 2973 2974 fmuld %f16,%f22,%f22 2975 2976 faddd %f30,qq1,%f30 2977 fmuld %f24,%f28,%f28 2978 add %l7,%g1,%l7 2979 2980 fmuld %f2,%f6,%f6 2981 ldd [%l4+8],%f0 2982 2983 fmuld %f10,%f14,%f14 2984 ldd [%l5+8],%f8 2985 2986 fmuld %f18,%f22,%f22 2987 ldd [%l6+8],%f16 2988 2989 fmuld %f26,%f28,%f28 2990 2991 fmuld %f0,%f4,%f4 2992 faddd %f32,%f6,%f6 2993 2994 fmuld %f8,%f12,%f12 2995 faddd %f34,%f14,%f14 2996 2997 fmuld %f16,%f20,%f20 2998 faddd %f36,%f22,%f22 2999 3000 fmuld %f24,%f30,%f30 3001 faddd %f28,%f38,%f28 3002 ldd [%l7+16],%f24 3003 3004 faddd %f2,%f6,%f6 3005 ldd [%l4+16],%f32 3006 3007 faddd %f10,%f14,%f14 3008 ldd [%l5+16],%f34 3009 3010 faddd %f18,%f22,%f22 3011 ldd [%l6+16],%f36 3012 3013 fmuld %f24,%f30,%f30 3014 faddd %f28,%f26,%f28 3015 ldd [%l7+8],%f38 3016 3017 fmuld %f32,%f6,%f6 3018 3019 fmuld %f34,%f14,%f14 3020 3021 fmuld %f36,%f22,%f22 3022 3023 fmuld %f38,%f28,%f28 3024 3025 faddd %f6,%f4,%f6 3026 3027 faddd %f14,%f12,%f14 3028 3029 faddd %f22,%f20,%f22 3030 3031 fsubd %f30,%f28,%f30 3032 3033 faddd %f6,%f0,%f6 3034 3035 faddd %f14,%f8,%f14 3036 3037 faddd %f22,%f16,%f22 3038 3039 faddd %f30,%f24,%f30 3040 mov %l0,%l4 3041 3042 fnegd %f6,%f4 3043 lda [%i1]%asi,%l0 ! preload next argument 3044 3045 fnegd %f14,%f12 3046 lda [%i1]%asi,%f0 3047 3048 fnegd %f22,%f20 3049 lda [%i1+4]%asi,%f3 3050 3051 fnegd %f30,%f28 3052 andn %l0,%i5,%l0 3053 add %i1,%i2,%i1 3054 3055 andcc %l4,2,%g0 3056 fmovdnz %icc,%f4,%f6 3057 st %f6,[%o0] 3058 3059 andcc %l1,2,%g0 3060 fmovdnz %icc,%f12,%f14 3061 st %f14,[%o1] 3062 3063 andcc %l2,2,%g0 3064 fmovdnz %icc,%f20,%f22 3065 st %f22,[%o2] 3066 3067 andcc %l3,2,%g0 3068 fmovdnz %icc,%f28,%f30 3069 st %f30,[%o3] 3070 3071 addcc %i0,-1,%i0 3072 bg,pt %icc,.loop0 3073! delay slot 3074 st %f7,[%o0+4] 3075 3076 ba,pt %icc,.end 3077! delay slot 3078 nop 3079 3080 .align 16 3081.case15: 3082 fmuld %f0,pp3,%f6 ! sin(x0) 3083 3084 fmuld %f8,pp3,%f14 ! sin(x1) 3085 3086 fmuld %f16,pp3,%f22 ! sin(x2) 3087 3088 fmuld %f24,pp3,%f30 ! sin(x3) 3089 3090 faddd %f6,pp2,%f6 3091 fmuld %f0,qq2,%f4 3092 3093 faddd %f14,pp2,%f14 3094 fmuld %f8,qq2,%f12 3095 3096 faddd %f22,pp2,%f22 3097 fmuld %f16,qq2,%f20 3098 3099 faddd %f30,pp2,%f30 3100 fmuld %f24,qq2,%f28 3101 3102 fmuld %f0,%f6,%f6 3103 faddd %f4,qq1,%f4 3104 3105 fmuld %f8,%f14,%f14 3106 faddd %f12,qq1,%f12 3107 3108 fmuld %f16,%f22,%f22 3109 faddd %f20,qq1,%f20 3110 3111 fmuld %f24,%f30,%f30 3112 faddd %f28,qq1,%f28 3113 3114 faddd %f6,pp1,%f6 3115 fmuld %f0,%f4,%f4 3116 add %l4,%g1,%l4 3117 3118 faddd %f14,pp1,%f14 3119 fmuld %f8,%f12,%f12 3120 add %l5,%g1,%l5 3121 3122 faddd %f22,pp1,%f22 3123 fmuld %f16,%f20,%f20 3124 add %l6,%g1,%l6 3125 3126 faddd %f30,pp1,%f30 3127 fmuld %f24,%f28,%f28 3128 add %l7,%g1,%l7 3129 3130 fmuld %f0,%f6,%f6 3131 3132 fmuld %f8,%f14,%f14 3133 3134 fmuld %f16,%f22,%f22 3135 3136 fmuld %f24,%f30,%f30 3137 3138 fmuld %f2,%f6,%f6 3139 ldd [%l4+8],%f0 3140 3141 fmuld %f10,%f14,%f14 3142 ldd [%l5+8],%f8 3143 3144 fmuld %f18,%f22,%f22 3145 ldd [%l6+8],%f16 3146 3147 fmuld %f26,%f30,%f30 3148 ldd [%l7+8],%f24 3149 3150 fmuld %f0,%f4,%f4 3151 faddd %f32,%f6,%f6 3152 3153 fmuld %f8,%f12,%f12 3154 faddd %f34,%f14,%f14 3155 3156 fmuld %f16,%f20,%f20 3157 faddd %f36,%f22,%f22 3158 3159 fmuld %f24,%f28,%f28 3160 faddd %f38,%f30,%f30 3161 3162 faddd %f2,%f6,%f6 3163 ldd [%l4+16],%f32 3164 3165 faddd %f10,%f14,%f14 3166 ldd [%l5+16],%f34 3167 3168 faddd %f18,%f22,%f22 3169 ldd [%l6+16],%f36 3170 3171 faddd %f26,%f30,%f30 3172 ldd [%l7+16],%f38 3173 3174 fmuld %f32,%f6,%f6 3175 3176 fmuld %f34,%f14,%f14 3177 3178 fmuld %f36,%f22,%f22 3179 3180 fmuld %f38,%f30,%f30 3181 3182 faddd %f6,%f4,%f6 3183 3184 faddd %f14,%f12,%f14 3185 3186 faddd %f22,%f20,%f22 3187 3188 faddd %f30,%f28,%f30 3189 3190 faddd %f6,%f0,%f6 3191 3192 faddd %f14,%f8,%f14 3193 3194 faddd %f22,%f16,%f22 3195 3196 faddd %f30,%f24,%f30 3197 mov %l0,%l4 3198 3199 fnegd %f6,%f4 3200 lda [%i1]%asi,%l0 ! preload next argument 3201 3202 fnegd %f14,%f12 3203 lda [%i1]%asi,%f0 3204 3205 fnegd %f22,%f20 3206 lda [%i1+4]%asi,%f3 3207 3208 fnegd %f30,%f28 3209 andn %l0,%i5,%l0 3210 add %i1,%i2,%i1 3211 3212 andcc %l4,2,%g0 3213 fmovdnz %icc,%f4,%f6 3214 st %f6,[%o0] 3215 3216 andcc %l1,2,%g0 3217 fmovdnz %icc,%f12,%f14 3218 st %f14,[%o1] 3219 3220 andcc %l2,2,%g0 3221 fmovdnz %icc,%f20,%f22 3222 st %f22,[%o2] 3223 3224 andcc %l3,2,%g0 3225 fmovdnz %icc,%f28,%f30 3226 st %f30,[%o3] 3227 3228 addcc %i0,-1,%i0 3229 bg,pt %icc,.loop0 3230! delay slot 3231 st %f7,[%o0+4] 3232 3233 ba,pt %icc,.end 3234! delay slot 3235 nop 3236 3237 3238 .align 16 3239.end: 3240 st %f15,[%o1+4] 3241 st %f23,[%o2+4] 3242 st %f31,[%o3+4] 3243 ld [%fp+biguns],%i5 3244 tst %i5 ! check for huge arguments remaining 3245 be,pt %icc,.exit 3246! delay slot 3247 nop 3248#ifdef __sparcv9 3249 ldx [%fp+xsave],%o1 3250 ldx [%fp+ysave],%o3 3251#else 3252 ld [%fp+xsave],%o1 3253 ld [%fp+ysave],%o3 3254#endif 3255 ld [%fp+nsave],%o0 3256 ld [%fp+sxsave],%o2 3257 ld [%fp+sysave],%o4 3258 sra %o2,0,%o2 ! sign-extend for V9 3259 sra %o4,0,%o4 3260 call __vlibm_vsin_big_ultra3 3261 sra %o5,0,%o5 ! delay slot 3262 3263.exit: 3264 ret 3265 restore 3266 3267 3268 .align 16 3269.last1: 3270 faddd %f2,c3two44,%f4 3271 st %f15,[%o1+4] 3272.last1_from_range1: 3273 mov 0,%l1 3274 fzeros %f8 3275 fzero %f10 3276 add %fp,junk,%o1 3277.last2: 3278 faddd %f10,c3two44,%f12 3279 st %f23,[%o2+4] 3280.last2_from_range2: 3281 mov 0,%l2 3282 fzeros %f16 3283 fzero %f18 3284 add %fp,junk,%o2 3285.last3: 3286 faddd %f18,c3two44,%f20 3287 st %f31,[%o3+4] 3288 st %f5,[%fp+nk0] 3289 st %f13,[%fp+nk1] 3290.last3_from_range3: 3291 mov 0,%l3 3292 fzeros %f24 3293 fzero %f26 3294 ba,pt %icc,.cont 3295! delay slot 3296 add %fp,junk,%o3 3297 3298 3299 .align 16 3300.range0: 3301 cmp %l0,%o4 3302 bl,pt %icc,1f ! hx < 0x3e400000 3303! delay slot, harmless if branch taken 3304 sethi %hi(0x7ff00000),%o7 3305 cmp %l0,%o7 3306 bl,a,pt %icc,2f ! branch if finite 3307! delay slot, squashed if branch not taken 3308 st %o4,[%fp+biguns] ! set biguns 3309 fzero %f0 3310 fmuld %f2,%f0,%f2 3311 st %f2,[%o0] 3312 ba,pt %icc,2f 3313! delay slot 3314 st %f3,[%o0+4] 33151: 3316 fdtoi %f2,%f4 ! raise inexact if not zero 3317 st %f0,[%o0] 3318 st %f3,[%o0+4] 33192: 3320 addcc %i0,-1,%i0 3321 ble,pn %icc,.end 3322! delay slot, harmless if branch taken 3323 add %i3,%i4,%i3 ! y += stridey 3324 andn %l1,%i5,%l0 ! hx &= ~0x80000000 3325 fmovs %f8,%f0 3326 fmovs %f11,%f3 3327 ba,pt %icc,.loop0 3328! delay slot 3329 add %i1,%i2,%i1 ! x += stridex 3330 3331 3332 .align 16 3333.range1: 3334 cmp %l1,%o4 3335 bl,pt %icc,1f ! hx < 0x3e400000 3336! delay slot, harmless if branch taken 3337 sethi %hi(0x7ff00000),%o7 3338 cmp %l1,%o7 3339 bl,a,pt %icc,2f ! branch if finite 3340! delay slot, squashed if branch not taken 3341 st %o4,[%fp+biguns] ! set biguns 3342 fzero %f8 3343 fmuld %f10,%f8,%f10 3344 st %f10,[%o1] 3345 ba,pt %icc,2f 3346! delay slot 3347 st %f11,[%o1+4] 33481: 3349 fdtoi %f10,%f12 ! raise inexact if not zero 3350 st %f8,[%o1] 3351 st %f11,[%o1+4] 33522: 3353 addcc %i0,-1,%i0 3354 ble,pn %icc,.last1_from_range1 3355! delay slot, harmless if branch taken 3356 add %i3,%i4,%i3 ! y += stridey 3357 andn %l2,%i5,%l1 ! hx &= ~0x80000000 3358 fmovs %f16,%f8 3359 fmovs %f19,%f11 3360 ba,pt %icc,.loop1 3361! delay slot 3362 add %i1,%i2,%i1 ! x += stridex 3363 3364 3365 .align 16 3366.range2: 3367 cmp %l2,%o4 3368 bl,pt %icc,1f ! hx < 0x3e400000 3369! delay slot, harmless if branch taken 3370 sethi %hi(0x7ff00000),%o7 3371 cmp %l2,%o7 3372 bl,a,pt %icc,2f ! branch if finite 3373! delay slot, squashed if branch not taken 3374 st %o4,[%fp+biguns] ! set biguns 3375 fzero %f16 3376 fmuld %f18,%f16,%f18 3377 st %f18,[%o2] 3378 ba,pt %icc,2f 3379! delay slot 3380 st %f19,[%o2+4] 33811: 3382 fdtoi %f18,%f20 ! raise inexact if not zero 3383 st %f16,[%o2] 3384 st %f19,[%o2+4] 33852: 3386 addcc %i0,-1,%i0 3387 ble,pn %icc,.last2_from_range2 3388! delay slot, harmless if branch taken 3389 add %i3,%i4,%i3 ! y += stridey 3390 andn %l3,%i5,%l2 ! hx &= ~0x80000000 3391 fmovs %f24,%f16 3392 fmovs %f27,%f19 3393 ba,pt %icc,.loop2 3394! delay slot 3395 add %i1,%i2,%i1 ! x += stridex 3396 3397 3398 .align 16 3399.range3: 3400 cmp %l3,%o4 3401 bl,pt %icc,1f ! hx < 0x3e400000 3402! delay slot, harmless if branch taken 3403 sethi %hi(0x7ff00000),%o7 3404 cmp %l3,%o7 3405 bl,a,pt %icc,2f ! branch if finite 3406! delay slot, squashed if branch not taken 3407 st %o4,[%fp+biguns] ! set biguns 3408 fzero %f24 3409 fmuld %f26,%f24,%f26 3410 st %f26,[%o3] 3411 ba,pt %icc,2f 3412! delay slot 3413 st %f27,[%o3+4] 34141: 3415 fdtoi %f26,%f28 ! raise inexact if not zero 3416 st %f24,[%o3] 3417 st %f27,[%o3+4] 34182: 3419 addcc %i0,-1,%i0 3420 ble,pn %icc,.last3_from_range3 3421! delay slot, harmless if branch taken 3422 add %i3,%i4,%i3 ! y += stridey 3423 ld [%i1],%l3 3424 ld [%i1],%f24 3425 ld [%i1+4],%f27 3426 andn %l3,%i5,%l3 ! hx &= ~0x80000000 3427 ba,pt %icc,.loop3 3428! delay slot 3429 add %i1,%i2,%i1 ! x += stridex 3430 3431 SET_SIZE(__vsin_ultra3) 3432 3433