1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vcos_ultra3.S" 30 31#include "libm.h" 32#if defined(LIBMVEC_SO_BUILD) 33 .weak __vcos 34 .type __vcos,#function 35 __vcos = __vcos_ultra3 36#endif 37 38 RO_DATA 39 .align 64 40constants: 41 .word 0x42c80000,0x00000000 ! 3 * 2^44 42 .word 0x43380000,0x00000000 ! 3 * 2^51 43 .word 0x3fe45f30,0x6dc9c883 ! invpio2 44 .word 0x3ff921fb,0x54442c00 ! pio2_1 45 .word 0x3d318469,0x898cc400 ! pio2_2 46 .word 0x3a71701b,0x839a2520 ! pio2_3 47 .word 0xbfc55555,0x55555533 ! pp1 48 .word 0x3f811111,0x10e7d53b ! pp2 49 .word 0xbf2a0167,0xe6b3cf9b ! pp3 50 .word 0xbfdfffff,0xffffff65 ! qq1 51 .word 0x3fa55555,0x54f88ed0 ! qq2 52 .word 0xbf56c12c,0xdd185f60 ! qq3 53 54! local storage indices 55 56#define xsave STACK_BIAS-0x8 57#define ysave STACK_BIAS-0x10 58#define nsave STACK_BIAS-0x14 59#define sxsave STACK_BIAS-0x18 60#define sysave STACK_BIAS-0x1c 61#define biguns STACK_BIAS-0x20 62#define nk3 STACK_BIAS-0x24 63#define nk2 STACK_BIAS-0x28 64#define nk1 STACK_BIAS-0x2c 65#define nk0 STACK_BIAS-0x30 66#define junk STACK_BIAS-0x38 67! sizeof temp storage - must be a multiple of 16 for V9 68#define tmps 0x40 69 70! register use 71 72! i0 n 73! i1 x 74! i2 stridex 75! i3 y 76! i4 stridey 77! i5 0x80000000 78 79! l0 hx0 80! l1 hx1 81! l2 hx2 82! l3 hx3 83! l4 k0 84! l5 k1 85! l6 k2 86! l7 k3 87 88! the following are 64-bit registers in both V8+ and V9 89 90! g1 __vlibm_TBL_sincos2 91! g5 scratch 92 93! o0 py0 94! o1 py1 95! o2 py2 96! o3 py3 97! o4 0x3e400000 98! o5 0x3fe921fb,0x4099251e 99! o7 scratch 100 101! f0 hx0 102! f2 103! f4 104! f6 105! f8 hx1 106! f10 107! f12 108! f14 109! f16 hx2 110! f18 111! f20 112! f22 113! f24 hx3 114! f26 115! f28 116! f30 117! f32 118! f34 119! f36 120! f38 121 122#define c3two44 %f40 123#define c3two51 %f42 124#define invpio2 %f44 125#define pio2_1 %f46 126#define pio2_2 %f48 127#define pio2_3 %f50 128#define pp1 %f52 129#define pp2 %f54 130#define pp3 %f56 131#define qq1 %f58 132#define qq2 %f60 133#define qq3 %f62 134 135 ENTRY(__vcos_ultra3) 136 save %sp,-SA(MINFRAME)-tmps,%sp 137 PIC_SETUP(l7) 138 PIC_SET(l7,constants,o0) 139 PIC_SET(l7,__vlibm_TBL_sincos2,o1) 140 mov %o1,%g1 141 wr %g0,0x82,%asi ! set %asi for non-faulting loads 142#ifdef __sparcv9 143 stx %i1,[%fp+xsave] ! save arguments 144 stx %i3,[%fp+ysave] 145#else 146 st %i1,[%fp+xsave] ! save arguments 147 st %i3,[%fp+ysave] 148#endif 149 st %i0,[%fp+nsave] 150 st %i2,[%fp+sxsave] 151 st %i4,[%fp+sysave] 152 st %g0,[%fp+biguns] ! biguns = 0 153 ldd [%o0+0x00],c3two44 ! load/set up constants 154 ldd [%o0+0x08],c3two51 155 ldd [%o0+0x10],invpio2 156 ldd [%o0+0x18],pio2_1 157 ldd [%o0+0x20],pio2_2 158 ldd [%o0+0x28],pio2_3 159 ldd [%o0+0x30],pp1 160 ldd [%o0+0x38],pp2 161 ldd [%o0+0x40],pp3 162 ldd [%o0+0x48],qq1 163 ldd [%o0+0x50],qq2 164 ldd [%o0+0x58],qq3 165 sethi %hi(0x80000000),%i5 166 sethi %hi(0x3e400000),%o4 167 sethi %hi(0x3fe921fb),%o5 168 or %o5,%lo(0x3fe921fb),%o5 169 sllx %o5,32,%o5 170 sethi %hi(0x4099251e),%o7 171 or %o7,%lo(0x4099251e),%o7 172 or %o5,%o7,%o5 173 sll %i2,3,%i2 ! scale strides 174 sll %i4,3,%i4 175 add %fp,junk,%o1 ! loop prologue 176 add %fp,junk,%o2 177 add %fp,junk,%o3 178 ld [%i1],%l0 ! *x 179 ld [%i1],%f0 180 ld [%i1+4],%f3 181 andn %l0,%i5,%l0 ! mask off sign 182 add %i1,%i2,%i1 ! x += stridex 183 ba .loop0 184 nop 185 186! 16-byte aligned 187 .align 16 188.loop0: 189 lda [%i1]%asi,%l1 ! preload next argument 190 sub %l0,%o4,%g5 191 sub %o5,%l0,%o7 192 fabss %f0,%f2 193 194 lda [%i1]%asi,%f8 195 orcc %o7,%g5,%g0 196 mov %i3,%o0 ! py0 = y 197 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e 198 199! delay slot 200 lda [%i1+4]%asi,%f11 201 addcc %i0,-1,%i0 202 add %i3,%i4,%i3 ! y += stridey 203 ble,pn %icc,.last1 204 205! delay slot 206 andn %l1,%i5,%l1 207 add %i1,%i2,%i1 ! x += stridex 208 faddd %f2,c3two44,%f4 209 st %f15,[%o1+4] 210 211.loop1: 212 lda [%i1]%asi,%l2 ! preload next argument 213 sub %l1,%o4,%g5 214 sub %o5,%l1,%o7 215 fabss %f8,%f10 216 217 lda [%i1]%asi,%f16 218 orcc %o7,%g5,%g0 219 mov %i3,%o1 ! py1 = y 220 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e 221 222! delay slot 223 lda [%i1+4]%asi,%f19 224 addcc %i0,-1,%i0 225 add %i3,%i4,%i3 ! y += stridey 226 ble,pn %icc,.last2 227 228! delay slot 229 andn %l2,%i5,%l2 230 add %i1,%i2,%i1 ! x += stridex 231 faddd %f10,c3two44,%f12 232 st %f23,[%o2+4] 233 234.loop2: 235 lda [%i1]%asi,%l3 ! preload next argument 236 sub %l2,%o4,%g5 237 sub %o5,%l2,%o7 238 fabss %f16,%f18 239 240 lda [%i1]%asi,%f24 241 orcc %o7,%g5,%g0 242 mov %i3,%o2 ! py2 = y 243 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e 244 245! delay slot 246 lda [%i1+4]%asi,%f27 247 addcc %i0,-1,%i0 248 add %i3,%i4,%i3 ! y += stridey 249 ble,pn %icc,.last3 250 251! delay slot 252 andn %l3,%i5,%l3 253 add %i1,%i2,%i1 ! x += stridex 254 faddd %f18,c3two44,%f20 255 st %f31,[%o3+4] 256 257.loop3: 258 sub %l3,%o4,%g5 259 sub %o5,%l3,%o7 260 fabss %f24,%f26 261 st %f5,[%fp+nk0] 262 263 orcc %o7,%g5,%g0 264 mov %i3,%o3 ! py3 = y 265 bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e 266! delay slot 267 st %f13,[%fp+nk1] 268 269!!! DONE? 270.cont: 271 srlx %o5,32,%o7 272 add %i3,%i4,%i3 ! y += stridey 273 fmovs %f3,%f1 274 st %f21,[%fp+nk2] 275 276 sub %o7,%l0,%l0 277 sub %o7,%l1,%l1 278 faddd %f26,c3two44,%f28 279 st %f29,[%fp+nk3] 280 281 sub %o7,%l2,%l2 282 sub %o7,%l3,%l3 283 fmovs %f11,%f9 284 285 or %l0,%l1,%l0 286 or %l2,%l3,%l2 287 fmovs %f19,%f17 288 289 fmovs %f27,%f25 290 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range 291 292 fmuld %f8,invpio2,%f14 293 ld [%fp+nk0],%l4 294 295 fmuld %f16,invpio2,%f22 296 ld [%fp+nk1],%l5 297 298 orcc %l0,%l2,%g0 299 bl,pn %icc,.medium 300! delay slot 301 fmuld %f24,invpio2,%f30 302 ld [%fp+nk2],%l6 303 304 ld [%fp+nk3],%l7 305 sll %l4,5,%l4 ! k 306 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 307 308 sll %l5,5,%l5 309 ldd [%l4+%g1],%f4 310 fcmpd %fcc1,%f8,pio2_3 311 312 sll %l6,5,%l6 313 ldd [%l5+%g1],%f12 314 fcmpd %fcc2,%f16,pio2_3 315 316 sll %l7,5,%l7 317 ldd [%l6+%g1],%f20 318 fcmpd %fcc3,%f24,pio2_3 319 320 ldd [%l7+%g1],%f28 321 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 322 323 fsubd %f10,%f12,%f10 324 325 fsubd %f18,%f20,%f18 326 327 fsubd %f26,%f28,%f26 328 329 fmuld %f2,%f2,%f0 ! z = x * x 330 331 fmuld %f10,%f10,%f8 332 333 fmuld %f18,%f18,%f16 334 335 fmuld %f26,%f26,%f24 336 337 fmuld %f0,qq3,%f6 338 339 fmuld %f8,qq3,%f14 340 341 fmuld %f16,qq3,%f22 342 343 fmuld %f24,qq3,%f30 344 345 faddd %f6,qq2,%f6 346 fmuld %f0,pp2,%f4 347 348 faddd %f14,qq2,%f14 349 fmuld %f8,pp2,%f12 350 351 faddd %f22,qq2,%f22 352 fmuld %f16,pp2,%f20 353 354 faddd %f30,qq2,%f30 355 fmuld %f24,pp2,%f28 356 357 fmuld %f0,%f6,%f6 358 faddd %f4,pp1,%f4 359 360 fmuld %f8,%f14,%f14 361 faddd %f12,pp1,%f12 362 363 fmuld %f16,%f22,%f22 364 faddd %f20,pp1,%f20 365 366 fmuld %f24,%f30,%f30 367 faddd %f28,pp1,%f28 368 369 faddd %f6,qq1,%f6 370 fmuld %f0,%f4,%f4 371 add %l4,%g1,%l4 372 373 faddd %f14,qq1,%f14 374 fmuld %f8,%f12,%f12 375 add %l5,%g1,%l5 376 377 faddd %f22,qq1,%f22 378 fmuld %f16,%f20,%f20 379 add %l6,%g1,%l6 380 381 faddd %f30,qq1,%f30 382 fmuld %f24,%f28,%f28 383 add %l7,%g1,%l7 384 385 fmuld %f2,%f4,%f4 386 387 fmuld %f10,%f12,%f12 388 389 fmuld %f18,%f20,%f20 390 391 fmuld %f26,%f28,%f28 392 393 fmuld %f0,%f6,%f6 394 faddd %f4,%f2,%f4 395 ldd [%l4+16],%f32 396 397 fmuld %f8,%f14,%f14 398 faddd %f12,%f10,%f12 399 ldd [%l5+16],%f34 400 401 fmuld %f16,%f22,%f22 402 faddd %f20,%f18,%f20 403 ldd [%l6+16],%f36 404 405 fmuld %f24,%f30,%f30 406 faddd %f28,%f26,%f28 407 ldd [%l7+16],%f38 408 409 fmuld %f32,%f6,%f6 410 ldd [%l4+8],%f2 411 412 fmuld %f34,%f14,%f14 413 ldd [%l5+8],%f10 414 415 fmuld %f36,%f22,%f22 416 ldd [%l6+8],%f18 417 418 fmuld %f38,%f30,%f30 419 ldd [%l7+8],%f26 420 421 fmuld %f2,%f4,%f4 422 423 fmuld %f10,%f12,%f12 424 425 fmuld %f18,%f20,%f20 426 427 fmuld %f26,%f28,%f28 428 429 fsubd %f6,%f4,%f6 430 lda [%i1]%asi,%l0 ! preload next argument 431 432 fsubd %f14,%f12,%f14 433 lda [%i1]%asi,%f0 434 435 fsubd %f22,%f20,%f22 436 lda [%i1+4]%asi,%f3 437 438 fsubd %f30,%f28,%f30 439 andn %l0,%i5,%l0 440 add %i1,%i2,%i1 441 442 faddd %f6,%f32,%f6 443 st %f6,[%o0] 444 445 faddd %f14,%f34,%f14 446 st %f14,[%o1] 447 448 faddd %f22,%f36,%f22 449 st %f22,[%o2] 450 451 faddd %f30,%f38,%f30 452 st %f30,[%o3] 453 addcc %i0,-1,%i0 454 455 bg,pt %icc,.loop0 456! delay slot 457 st %f7,[%o0+4] 458 459 ba,pt %icc,.end 460! delay slot 461 nop 462 463 464 .align 16 465.medium: 466 faddd %f6,c3two51,%f4 467 st %f5,[%fp+nk0] 468 469 faddd %f14,c3two51,%f12 470 st %f13,[%fp+nk1] 471 472 faddd %f22,c3two51,%f20 473 st %f21,[%fp+nk2] 474 475 faddd %f30,c3two51,%f28 476 st %f29,[%fp+nk3] 477 478 fsubd %f4,c3two51,%f6 479 480 fsubd %f12,c3two51,%f14 481 482 fsubd %f20,c3two51,%f22 483 484 fsubd %f28,c3two51,%f30 485 486 fmuld %f6,pio2_1,%f2 487 ld [%fp+nk0],%l0 ! n 488 489 fmuld %f14,pio2_1,%f10 490 ld [%fp+nk1],%l1 491 492 fmuld %f22,pio2_1,%f18 493 ld [%fp+nk2],%l2 494 495 fmuld %f30,pio2_1,%f26 496 ld [%fp+nk3],%l3 497 498 fsubd %f0,%f2,%f0 499 fmuld %f6,pio2_2,%f4 500 add %l0,1,%l0 501 502 fsubd %f8,%f10,%f8 503 fmuld %f14,pio2_2,%f12 504 add %l1,1,%l1 505 506 fsubd %f16,%f18,%f16 507 fmuld %f22,pio2_2,%f20 508 add %l2,1,%l2 509 510 fsubd %f24,%f26,%f24 511 fmuld %f30,pio2_2,%f28 512 add %l3,1,%l3 513 514 fsubd %f0,%f4,%f32 515 516 fsubd %f8,%f12,%f34 517 518 fsubd %f16,%f20,%f36 519 520 fsubd %f24,%f28,%f38 521 522 fsubd %f0,%f32,%f0 523 fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 524 525 fsubd %f8,%f34,%f8 526 fcmple32 %f34,pio2_3,%l5 527 528 fsubd %f16,%f36,%f16 529 fcmple32 %f36,pio2_3,%l6 530 531 fsubd %f24,%f38,%f24 532 fcmple32 %f38,pio2_3,%l7 533 534 fsubd %f0,%f4,%f0 535 fmuld %f6,pio2_3,%f6 536 sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 537 538 fsubd %f8,%f12,%f8 539 fmuld %f14,pio2_3,%f14 540 sll %l5,30,%l5 541 542 fsubd %f16,%f20,%f16 543 fmuld %f22,pio2_3,%f22 544 sll %l6,30,%l6 545 546 fsubd %f24,%f28,%f24 547 fmuld %f30,pio2_3,%f30 548 sll %l7,30,%l7 549 550 fsubd %f6,%f0,%f6 551 sra %l4,31,%l4 552 553 fsubd %f14,%f8,%f14 554 sra %l5,31,%l5 555 556 fsubd %f22,%f16,%f22 557 sra %l6,31,%l6 558 559 fsubd %f30,%f24,%f30 560 sra %l7,31,%l7 561 562 fsubd %f32,%f6,%f0 ! reduced x 563 xor %l0,%l4,%l0 564 565 fsubd %f34,%f14,%f8 566 xor %l1,%l5,%l1 567 568 fsubd %f36,%f22,%f16 569 xor %l2,%l6,%l2 570 571 fsubd %f38,%f30,%f24 572 xor %l3,%l7,%l3 573 574 fabsd %f0,%f2 575 sub %l0,%l4,%l0 576 577 fabsd %f8,%f10 578 sub %l1,%l5,%l1 579 580 fabsd %f16,%f18 581 sub %l2,%l6,%l2 582 583 fabsd %f24,%f26 584 sub %l3,%l7,%l3 585 586 faddd %f2,c3two44,%f4 587 st %f5,[%fp+nk0] 588 and %l4,2,%l4 589 590 faddd %f10,c3two44,%f12 591 st %f13,[%fp+nk1] 592 and %l5,2,%l5 593 594 faddd %f18,c3two44,%f20 595 st %f21,[%fp+nk2] 596 and %l6,2,%l6 597 598 faddd %f26,c3two44,%f28 599 st %f29,[%fp+nk3] 600 and %l7,2,%l7 601 602 fsubd %f32,%f0,%f4 603 xor %l0,%l4,%l0 604 605 fsubd %f34,%f8,%f12 606 xor %l1,%l5,%l1 607 608 fsubd %f36,%f16,%f20 609 xor %l2,%l6,%l2 610 611 fsubd %f38,%f24,%f28 612 xor %l3,%l7,%l3 613 614 fzero %f38 615 ld [%fp+nk0],%l4 616 617 fsubd %f4,%f6,%f6 ! w 618 ld [%fp+nk1],%l5 619 620 fsubd %f12,%f14,%f14 621 ld [%fp+nk2],%l6 622 623 fnegd %f38,%f38 624 ld [%fp+nk3],%l7 625 sll %l4,5,%l4 ! k 626 627 fsubd %f20,%f22,%f22 628 sll %l5,5,%l5 629 630 fsubd %f28,%f30,%f30 631 sll %l6,5,%l6 632 633 fand %f0,%f38,%f32 ! sign bit of x 634 ldd [%l4+%g1],%f4 635 sll %l7,5,%l7 636 637 fand %f8,%f38,%f34 638 ldd [%l5+%g1],%f12 639 640 fand %f16,%f38,%f36 641 ldd [%l6+%g1],%f20 642 643 fand %f24,%f38,%f38 644 ldd [%l7+%g1],%f28 645 646 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 647 648 fsubd %f10,%f12,%f10 649 650 fsubd %f18,%f20,%f18 651 nop 652 653 fsubd %f26,%f28,%f26 654 nop 655 656! 16-byte aligned 657 fmuld %f2,%f2,%f0 ! z = x * x 658 andcc %l0,1,%g0 659 bz,pn %icc,.case8 660! delay slot 661 fxor %f6,%f32,%f32 662 663 fmuld %f10,%f10,%f8 664 andcc %l1,1,%g0 665 bz,pn %icc,.case4 666! delay slot 667 fxor %f14,%f34,%f34 668 669 fmuld %f18,%f18,%f16 670 andcc %l2,1,%g0 671 bz,pn %icc,.case2 672! delay slot 673 fxor %f22,%f36,%f36 674 675 fmuld %f26,%f26,%f24 676 andcc %l3,1,%g0 677 bz,pn %icc,.case1 678! delay slot 679 fxor %f30,%f38,%f38 680 681!.case0: 682 fmuld %f0,qq3,%f6 ! cos(x0) 683 684 fmuld %f8,qq3,%f14 ! cos(x1) 685 686 fmuld %f16,qq3,%f22 ! cos(x2) 687 688 fmuld %f24,qq3,%f30 ! cos(x3) 689 690 faddd %f6,qq2,%f6 691 fmuld %f0,pp2,%f4 692 693 faddd %f14,qq2,%f14 694 fmuld %f8,pp2,%f12 695 696 faddd %f22,qq2,%f22 697 fmuld %f16,pp2,%f20 698 699 faddd %f30,qq2,%f30 700 fmuld %f24,pp2,%f28 701 702 fmuld %f0,%f6,%f6 703 faddd %f4,pp1,%f4 704 705 fmuld %f8,%f14,%f14 706 faddd %f12,pp1,%f12 707 708 fmuld %f16,%f22,%f22 709 faddd %f20,pp1,%f20 710 711 fmuld %f24,%f30,%f30 712 faddd %f28,pp1,%f28 713 714 faddd %f6,qq1,%f6 715 fmuld %f0,%f4,%f4 716 add %l4,%g1,%l4 717 718 faddd %f14,qq1,%f14 719 fmuld %f8,%f12,%f12 720 add %l5,%g1,%l5 721 722 faddd %f22,qq1,%f22 723 fmuld %f16,%f20,%f20 724 add %l6,%g1,%l6 725 726 faddd %f30,qq1,%f30 727 fmuld %f24,%f28,%f28 728 add %l7,%g1,%l7 729 730 fmuld %f2,%f4,%f4 731 732 fmuld %f10,%f12,%f12 733 734 fmuld %f18,%f20,%f20 735 736 fmuld %f26,%f28,%f28 737 738 fmuld %f0,%f6,%f6 739 faddd %f4,%f32,%f4 740 ldd [%l4+16],%f0 741 742 fmuld %f8,%f14,%f14 743 faddd %f12,%f34,%f12 744 ldd [%l5+16],%f8 745 746 fmuld %f16,%f22,%f22 747 faddd %f20,%f36,%f20 748 ldd [%l6+16],%f16 749 750 fmuld %f24,%f30,%f30 751 faddd %f28,%f38,%f28 752 ldd [%l7+16],%f24 753 754 fmuld %f0,%f6,%f6 755 faddd %f4,%f2,%f4 756 ldd [%l4+8],%f32 757 758 fmuld %f8,%f14,%f14 759 faddd %f12,%f10,%f12 760 ldd [%l5+8],%f34 761 762 fmuld %f16,%f22,%f22 763 faddd %f20,%f18,%f20 764 ldd [%l6+8],%f36 765 766 fmuld %f24,%f30,%f30 767 faddd %f28,%f26,%f28 768 ldd [%l7+8],%f38 769 770 fmuld %f32,%f4,%f4 771 772 fmuld %f34,%f12,%f12 773 774 fmuld %f36,%f20,%f20 775 776 fmuld %f38,%f28,%f28 777 778 fsubd %f6,%f4,%f6 779 780 fsubd %f14,%f12,%f14 781 782 fsubd %f22,%f20,%f22 783 784 fsubd %f30,%f28,%f30 785 786 faddd %f6,%f0,%f6 787 788 faddd %f14,%f8,%f14 789 790 faddd %f22,%f16,%f22 791 792 faddd %f30,%f24,%f30 793 mov %l0,%l4 794 795 fnegd %f6,%f4 796 lda [%i1]%asi,%l0 ! preload next argument 797 798 fnegd %f14,%f12 799 lda [%i1]%asi,%f0 800 801 fnegd %f22,%f20 802 lda [%i1+4]%asi,%f3 803 804 fnegd %f30,%f28 805 andn %l0,%i5,%l0 806 add %i1,%i2,%i1 807 808 andcc %l4,2,%g0 809 fmovdnz %icc,%f4,%f6 810 st %f6,[%o0] 811 812 andcc %l1,2,%g0 813 fmovdnz %icc,%f12,%f14 814 st %f14,[%o1] 815 816 andcc %l2,2,%g0 817 fmovdnz %icc,%f20,%f22 818 st %f22,[%o2] 819 820 andcc %l3,2,%g0 821 fmovdnz %icc,%f28,%f30 822 st %f30,[%o3] 823 824 addcc %i0,-1,%i0 825 bg,pt %icc,.loop0 826! delay slot 827 st %f7,[%o0+4] 828 829 ba,pt %icc,.end 830! delay slot 831 nop 832 833 .align 16 834.case1: 835 fmuld %f24,pp3,%f30 ! sin(x3) 836 837 fmuld %f0,qq3,%f6 ! cos(x0) 838 839 fmuld %f8,qq3,%f14 ! cos(x1) 840 841 fmuld %f16,qq3,%f22 ! cos(x2) 842 843 faddd %f30,pp2,%f30 844 fmuld %f24,qq2,%f28 845 846 faddd %f6,qq2,%f6 847 fmuld %f0,pp2,%f4 848 849 faddd %f14,qq2,%f14 850 fmuld %f8,pp2,%f12 851 852 faddd %f22,qq2,%f22 853 fmuld %f16,pp2,%f20 854 855 fmuld %f24,%f30,%f30 856 faddd %f28,qq1,%f28 857 858 fmuld %f0,%f6,%f6 859 faddd %f4,pp1,%f4 860 861 fmuld %f8,%f14,%f14 862 faddd %f12,pp1,%f12 863 864 fmuld %f16,%f22,%f22 865 faddd %f20,pp1,%f20 866 867 faddd %f30,pp1,%f30 868 fmuld %f24,%f28,%f28 869 add %l7,%g1,%l7 870 871 faddd %f6,qq1,%f6 872 fmuld %f0,%f4,%f4 873 add %l4,%g1,%l4 874 875 faddd %f14,qq1,%f14 876 fmuld %f8,%f12,%f12 877 add %l5,%g1,%l5 878 879 faddd %f22,qq1,%f22 880 fmuld %f16,%f20,%f20 881 add %l6,%g1,%l6 882 883 fmuld %f24,%f30,%f30 884 885 fmuld %f2,%f4,%f4 886 887 fmuld %f10,%f12,%f12 888 889 fmuld %f18,%f20,%f20 890 891 fmuld %f26,%f30,%f30 892 ldd [%l7+8],%f24 893 894 fmuld %f0,%f6,%f6 895 faddd %f4,%f32,%f4 896 ldd [%l4+16],%f0 897 898 fmuld %f8,%f14,%f14 899 faddd %f12,%f34,%f12 900 ldd [%l5+16],%f8 901 902 fmuld %f16,%f22,%f22 903 faddd %f20,%f36,%f20 904 ldd [%l6+16],%f16 905 906 fmuld %f24,%f28,%f28 907 faddd %f38,%f30,%f30 908 909 fmuld %f0,%f6,%f6 910 faddd %f4,%f2,%f4 911 ldd [%l4+8],%f32 912 913 fmuld %f8,%f14,%f14 914 faddd %f12,%f10,%f12 915 ldd [%l5+8],%f34 916 917 fmuld %f16,%f22,%f22 918 faddd %f20,%f18,%f20 919 ldd [%l6+8],%f36 920 921 faddd %f26,%f30,%f30 922 ldd [%l7+16],%f38 923 924 fmuld %f32,%f4,%f4 925 926 fmuld %f34,%f12,%f12 927 928 fmuld %f36,%f20,%f20 929 930 fmuld %f38,%f30,%f30 931 932 fsubd %f6,%f4,%f6 933 934 fsubd %f14,%f12,%f14 935 936 fsubd %f22,%f20,%f22 937 938 faddd %f30,%f28,%f30 939 940 faddd %f6,%f0,%f6 941 942 faddd %f14,%f8,%f14 943 944 faddd %f22,%f16,%f22 945 946 faddd %f30,%f24,%f30 947 mov %l0,%l4 948 949 fnegd %f6,%f4 950 lda [%i1]%asi,%l0 ! preload next argument 951 952 fnegd %f14,%f12 953 lda [%i1]%asi,%f0 954 955 fnegd %f22,%f20 956 lda [%i1+4]%asi,%f3 957 958 fnegd %f30,%f28 959 andn %l0,%i5,%l0 960 add %i1,%i2,%i1 961 962 andcc %l4,2,%g0 963 fmovdnz %icc,%f4,%f6 964 st %f6,[%o0] 965 966 andcc %l1,2,%g0 967 fmovdnz %icc,%f12,%f14 968 st %f14,[%o1] 969 970 andcc %l2,2,%g0 971 fmovdnz %icc,%f20,%f22 972 st %f22,[%o2] 973 974 andcc %l3,2,%g0 975 fmovdnz %icc,%f28,%f30 976 st %f30,[%o3] 977 978 addcc %i0,-1,%i0 979 bg,pt %icc,.loop0 980! delay slot 981 st %f7,[%o0+4] 982 983 ba,pt %icc,.end 984! delay slot 985 nop 986 987 .align 16 988.case2: 989 fmuld %f26,%f26,%f24 990 andcc %l3,1,%g0 991 bz,pn %icc,.case3 992! delay slot 993 fxor %f30,%f38,%f38 994 995 fmuld %f16,pp3,%f22 ! sin(x2) 996 997 fmuld %f0,qq3,%f6 ! cos(x0) 998 999 fmuld %f8,qq3,%f14 ! cos(x1) 1000 1001 faddd %f22,pp2,%f22 1002 fmuld %f16,qq2,%f20 1003 1004 fmuld %f24,qq3,%f30 ! cos(x3) 1005 1006 faddd %f6,qq2,%f6 1007 fmuld %f0,pp2,%f4 1008 1009 faddd %f14,qq2,%f14 1010 fmuld %f8,pp2,%f12 1011 1012 fmuld %f16,%f22,%f22 1013 faddd %f20,qq1,%f20 1014 1015 faddd %f30,qq2,%f30 1016 fmuld %f24,pp2,%f28 1017 1018 fmuld %f0,%f6,%f6 1019 faddd %f4,pp1,%f4 1020 1021 fmuld %f8,%f14,%f14 1022 faddd %f12,pp1,%f12 1023 1024 faddd %f22,pp1,%f22 1025 fmuld %f16,%f20,%f20 1026 add %l6,%g1,%l6 1027 1028 fmuld %f24,%f30,%f30 1029 faddd %f28,pp1,%f28 1030 1031 faddd %f6,qq1,%f6 1032 fmuld %f0,%f4,%f4 1033 add %l4,%g1,%l4 1034 1035 faddd %f14,qq1,%f14 1036 fmuld %f8,%f12,%f12 1037 add %l5,%g1,%l5 1038 1039 fmuld %f16,%f22,%f22 1040 1041 faddd %f30,qq1,%f30 1042 fmuld %f24,%f28,%f28 1043 add %l7,%g1,%l7 1044 1045 fmuld %f2,%f4,%f4 1046 1047 fmuld %f10,%f12,%f12 1048 1049 fmuld %f18,%f22,%f22 1050 ldd [%l6+8],%f16 1051 1052 fmuld %f26,%f28,%f28 1053 1054 fmuld %f0,%f6,%f6 1055 faddd %f4,%f32,%f4 1056 ldd [%l4+16],%f0 1057 1058 fmuld %f8,%f14,%f14 1059 faddd %f12,%f34,%f12 1060 ldd [%l5+16],%f8 1061 1062 fmuld %f16,%f20,%f20 1063 faddd %f36,%f22,%f22 1064 1065 fmuld %f24,%f30,%f30 1066 faddd %f28,%f38,%f28 1067 ldd [%l7+16],%f24 1068 1069 fmuld %f0,%f6,%f6 1070 faddd %f4,%f2,%f4 1071 ldd [%l4+8],%f32 1072 1073 fmuld %f8,%f14,%f14 1074 faddd %f12,%f10,%f12 1075 ldd [%l5+8],%f34 1076 1077 faddd %f18,%f22,%f22 1078 ldd [%l6+16],%f36 1079 1080 fmuld %f24,%f30,%f30 1081 faddd %f28,%f26,%f28 1082 ldd [%l7+8],%f38 1083 1084 fmuld %f32,%f4,%f4 1085 1086 fmuld %f34,%f12,%f12 1087 1088 fmuld %f36,%f22,%f22 1089 1090 fmuld %f38,%f28,%f28 1091 1092 fsubd %f6,%f4,%f6 1093 1094 fsubd %f14,%f12,%f14 1095 1096 faddd %f22,%f20,%f22 1097 1098 fsubd %f30,%f28,%f30 1099 1100 faddd %f6,%f0,%f6 1101 1102 faddd %f14,%f8,%f14 1103 1104 faddd %f22,%f16,%f22 1105 1106 faddd %f30,%f24,%f30 1107 mov %l0,%l4 1108 1109 fnegd %f6,%f4 1110 lda [%i1]%asi,%l0 ! preload next argument 1111 1112 fnegd %f14,%f12 1113 lda [%i1]%asi,%f0 1114 1115 fnegd %f22,%f20 1116 lda [%i1+4]%asi,%f3 1117 1118 fnegd %f30,%f28 1119 andn %l0,%i5,%l0 1120 add %i1,%i2,%i1 1121 1122 andcc %l4,2,%g0 1123 fmovdnz %icc,%f4,%f6 1124 st %f6,[%o0] 1125 1126 andcc %l1,2,%g0 1127 fmovdnz %icc,%f12,%f14 1128 st %f14,[%o1] 1129 1130 andcc %l2,2,%g0 1131 fmovdnz %icc,%f20,%f22 1132 st %f22,[%o2] 1133 1134 andcc %l3,2,%g0 1135 fmovdnz %icc,%f28,%f30 1136 st %f30,[%o3] 1137 1138 addcc %i0,-1,%i0 1139 bg,pt %icc,.loop0 1140! delay slot 1141 st %f7,[%o0+4] 1142 1143 ba,pt %icc,.end 1144! delay slot 1145 nop 1146 1147 .align 16 1148.case3: 1149 fmuld %f16,pp3,%f22 ! sin(x2) 1150 1151 fmuld %f24,pp3,%f30 ! sin(x3) 1152 1153 fmuld %f0,qq3,%f6 ! cos(x0) 1154 1155 fmuld %f8,qq3,%f14 ! cos(x1) 1156 1157 faddd %f22,pp2,%f22 1158 fmuld %f16,qq2,%f20 1159 1160 faddd %f30,pp2,%f30 1161 fmuld %f24,qq2,%f28 1162 1163 faddd %f6,qq2,%f6 1164 fmuld %f0,pp2,%f4 1165 1166 faddd %f14,qq2,%f14 1167 fmuld %f8,pp2,%f12 1168 1169 fmuld %f16,%f22,%f22 1170 faddd %f20,qq1,%f20 1171 1172 fmuld %f24,%f30,%f30 1173 faddd %f28,qq1,%f28 1174 1175 fmuld %f0,%f6,%f6 1176 faddd %f4,pp1,%f4 1177 1178 fmuld %f8,%f14,%f14 1179 faddd %f12,pp1,%f12 1180 1181 faddd %f22,pp1,%f22 1182 fmuld %f16,%f20,%f20 1183 add %l6,%g1,%l6 1184 1185 faddd %f30,pp1,%f30 1186 fmuld %f24,%f28,%f28 1187 add %l7,%g1,%l7 1188 1189 faddd %f6,qq1,%f6 1190 fmuld %f0,%f4,%f4 1191 add %l4,%g1,%l4 1192 1193 faddd %f14,qq1,%f14 1194 fmuld %f8,%f12,%f12 1195 add %l5,%g1,%l5 1196 1197 fmuld %f16,%f22,%f22 1198 1199 fmuld %f24,%f30,%f30 1200 1201 fmuld %f2,%f4,%f4 1202 1203 fmuld %f10,%f12,%f12 1204 1205 fmuld %f18,%f22,%f22 1206 ldd [%l6+8],%f16 1207 1208 fmuld %f26,%f30,%f30 1209 ldd [%l7+8],%f24 1210 1211 fmuld %f0,%f6,%f6 1212 faddd %f4,%f32,%f4 1213 ldd [%l4+16],%f0 1214 1215 fmuld %f8,%f14,%f14 1216 faddd %f12,%f34,%f12 1217 ldd [%l5+16],%f8 1218 1219 fmuld %f16,%f20,%f20 1220 faddd %f36,%f22,%f22 1221 1222 fmuld %f24,%f28,%f28 1223 faddd %f38,%f30,%f30 1224 1225 fmuld %f0,%f6,%f6 1226 faddd %f4,%f2,%f4 1227 ldd [%l4+8],%f32 1228 1229 fmuld %f8,%f14,%f14 1230 faddd %f12,%f10,%f12 1231 ldd [%l5+8],%f34 1232 1233 faddd %f18,%f22,%f22 1234 ldd [%l6+16],%f36 1235 1236 faddd %f26,%f30,%f30 1237 ldd [%l7+16],%f38 1238 1239 fmuld %f32,%f4,%f4 1240 1241 fmuld %f34,%f12,%f12 1242 1243 fmuld %f36,%f22,%f22 1244 1245 fmuld %f38,%f30,%f30 1246 1247 fsubd %f6,%f4,%f6 1248 1249 fsubd %f14,%f12,%f14 1250 1251 faddd %f22,%f20,%f22 1252 1253 faddd %f30,%f28,%f30 1254 1255 faddd %f6,%f0,%f6 1256 1257 faddd %f14,%f8,%f14 1258 1259 faddd %f22,%f16,%f22 1260 1261 faddd %f30,%f24,%f30 1262 mov %l0,%l4 1263 1264 fnegd %f6,%f4 1265 lda [%i1]%asi,%l0 ! preload next argument 1266 1267 fnegd %f14,%f12 1268 lda [%i1]%asi,%f0 1269 1270 fnegd %f22,%f20 1271 lda [%i1+4]%asi,%f3 1272 1273 fnegd %f30,%f28 1274 andn %l0,%i5,%l0 1275 add %i1,%i2,%i1 1276 1277 andcc %l4,2,%g0 1278 fmovdnz %icc,%f4,%f6 1279 st %f6,[%o0] 1280 1281 andcc %l1,2,%g0 1282 fmovdnz %icc,%f12,%f14 1283 st %f14,[%o1] 1284 1285 andcc %l2,2,%g0 1286 fmovdnz %icc,%f20,%f22 1287 st %f22,[%o2] 1288 1289 andcc %l3,2,%g0 1290 fmovdnz %icc,%f28,%f30 1291 st %f30,[%o3] 1292 1293 addcc %i0,-1,%i0 1294 bg,pt %icc,.loop0 1295! delay slot 1296 st %f7,[%o0+4] 1297 1298 ba,pt %icc,.end 1299! delay slot 1300 nop 1301 1302 .align 16 1303.case4: 1304 fmuld %f18,%f18,%f16 1305 andcc %l2,1,%g0 1306 bz,pn %icc,.case6 1307! delay slot 1308 fxor %f22,%f36,%f36 1309 1310 fmuld %f26,%f26,%f24 1311 andcc %l3,1,%g0 1312 bz,pn %icc,.case5 1313! delay slot 1314 fxor %f30,%f38,%f38 1315 1316 fmuld %f8,pp3,%f14 ! sin(x1) 1317 1318 fmuld %f0,qq3,%f6 ! cos(x0) 1319 1320 faddd %f14,pp2,%f14 1321 fmuld %f8,qq2,%f12 1322 1323 fmuld %f16,qq3,%f22 ! cos(x2) 1324 1325 fmuld %f24,qq3,%f30 ! cos(x3) 1326 1327 faddd %f6,qq2,%f6 1328 fmuld %f0,pp2,%f4 1329 1330 fmuld %f8,%f14,%f14 1331 faddd %f12,qq1,%f12 1332 1333 faddd %f22,qq2,%f22 1334 fmuld %f16,pp2,%f20 1335 1336 faddd %f30,qq2,%f30 1337 fmuld %f24,pp2,%f28 1338 1339 fmuld %f0,%f6,%f6 1340 faddd %f4,pp1,%f4 1341 1342 faddd %f14,pp1,%f14 1343 fmuld %f8,%f12,%f12 1344 add %l5,%g1,%l5 1345 1346 fmuld %f16,%f22,%f22 1347 faddd %f20,pp1,%f20 1348 1349 fmuld %f24,%f30,%f30 1350 faddd %f28,pp1,%f28 1351 1352 faddd %f6,qq1,%f6 1353 fmuld %f0,%f4,%f4 1354 add %l4,%g1,%l4 1355 1356 fmuld %f8,%f14,%f14 1357 1358 faddd %f22,qq1,%f22 1359 fmuld %f16,%f20,%f20 1360 add %l6,%g1,%l6 1361 1362 faddd %f30,qq1,%f30 1363 fmuld %f24,%f28,%f28 1364 add %l7,%g1,%l7 1365 1366 fmuld %f2,%f4,%f4 1367 1368 fmuld %f10,%f14,%f14 1369 ldd [%l5+8],%f8 1370 1371 fmuld %f18,%f20,%f20 1372 1373 fmuld %f26,%f28,%f28 1374 1375 fmuld %f0,%f6,%f6 1376 faddd %f4,%f32,%f4 1377 ldd [%l4+16],%f0 1378 1379 fmuld %f8,%f12,%f12 1380 faddd %f34,%f14,%f14 1381 1382 fmuld %f16,%f22,%f22 1383 faddd %f20,%f36,%f20 1384 ldd [%l6+16],%f16 1385 1386 fmuld %f24,%f30,%f30 1387 faddd %f28,%f38,%f28 1388 ldd [%l7+16],%f24 1389 1390 fmuld %f0,%f6,%f6 1391 faddd %f4,%f2,%f4 1392 ldd [%l4+8],%f32 1393 1394 faddd %f10,%f14,%f14 1395 ldd [%l5+16],%f34 1396 1397 fmuld %f16,%f22,%f22 1398 faddd %f20,%f18,%f20 1399 ldd [%l6+8],%f36 1400 1401 fmuld %f24,%f30,%f30 1402 faddd %f28,%f26,%f28 1403 ldd [%l7+8],%f38 1404 1405 fmuld %f32,%f4,%f4 1406 1407 fmuld %f34,%f14,%f14 1408 1409 fmuld %f36,%f20,%f20 1410 1411 fmuld %f38,%f28,%f28 1412 1413 fsubd %f6,%f4,%f6 1414 1415 faddd %f14,%f12,%f14 1416 1417 fsubd %f22,%f20,%f22 1418 1419 fsubd %f30,%f28,%f30 1420 1421 faddd %f6,%f0,%f6 1422 1423 faddd %f14,%f8,%f14 1424 1425 faddd %f22,%f16,%f22 1426 1427 faddd %f30,%f24,%f30 1428 mov %l0,%l4 1429 1430 fnegd %f6,%f4 1431 lda [%i1]%asi,%l0 ! preload next argument 1432 1433 fnegd %f14,%f12 1434 lda [%i1]%asi,%f0 1435 1436 fnegd %f22,%f20 1437 lda [%i1+4]%asi,%f3 1438 1439 fnegd %f30,%f28 1440 andn %l0,%i5,%l0 1441 add %i1,%i2,%i1 1442 1443 andcc %l4,2,%g0 1444 fmovdnz %icc,%f4,%f6 1445 st %f6,[%o0] 1446 1447 andcc %l1,2,%g0 1448 fmovdnz %icc,%f12,%f14 1449 st %f14,[%o1] 1450 1451 andcc %l2,2,%g0 1452 fmovdnz %icc,%f20,%f22 1453 st %f22,[%o2] 1454 1455 andcc %l3,2,%g0 1456 fmovdnz %icc,%f28,%f30 1457 st %f30,[%o3] 1458 1459 addcc %i0,-1,%i0 1460 bg,pt %icc,.loop0 1461! delay slot 1462 st %f7,[%o0+4] 1463 1464 ba,pt %icc,.end 1465! delay slot 1466 nop 1467 1468 .align 16 1469.case5: 1470 fmuld %f8,pp3,%f14 ! sin(x1) 1471 1472 fmuld %f24,pp3,%f30 ! sin(x3) 1473 1474 fmuld %f0,qq3,%f6 ! cos(x0) 1475 1476 faddd %f14,pp2,%f14 1477 fmuld %f8,qq2,%f12 1478 1479 fmuld %f16,qq3,%f22 ! cos(x2) 1480 1481 faddd %f30,pp2,%f30 1482 fmuld %f24,qq2,%f28 1483 1484 faddd %f6,qq2,%f6 1485 fmuld %f0,pp2,%f4 1486 1487 fmuld %f8,%f14,%f14 1488 faddd %f12,qq1,%f12 1489 1490 faddd %f22,qq2,%f22 1491 fmuld %f16,pp2,%f20 1492 1493 fmuld %f24,%f30,%f30 1494 faddd %f28,qq1,%f28 1495 1496 fmuld %f0,%f6,%f6 1497 faddd %f4,pp1,%f4 1498 1499 faddd %f14,pp1,%f14 1500 fmuld %f8,%f12,%f12 1501 add %l5,%g1,%l5 1502 1503 fmuld %f16,%f22,%f22 1504 faddd %f20,pp1,%f20 1505 1506 faddd %f30,pp1,%f30 1507 fmuld %f24,%f28,%f28 1508 add %l7,%g1,%l7 1509 1510 faddd %f6,qq1,%f6 1511 fmuld %f0,%f4,%f4 1512 add %l4,%g1,%l4 1513 1514 fmuld %f8,%f14,%f14 1515 1516 faddd %f22,qq1,%f22 1517 fmuld %f16,%f20,%f20 1518 add %l6,%g1,%l6 1519 1520 fmuld %f24,%f30,%f30 1521 1522 fmuld %f2,%f4,%f4 1523 1524 fmuld %f10,%f14,%f14 1525 ldd [%l5+8],%f8 1526 1527 fmuld %f18,%f20,%f20 1528 1529 fmuld %f26,%f30,%f30 1530 ldd [%l7+8],%f24 1531 1532 fmuld %f0,%f6,%f6 1533 faddd %f4,%f32,%f4 1534 ldd [%l4+16],%f0 1535 1536 fmuld %f8,%f12,%f12 1537 faddd %f34,%f14,%f14 1538 1539 fmuld %f16,%f22,%f22 1540 faddd %f20,%f36,%f20 1541 ldd [%l6+16],%f16 1542 1543 fmuld %f24,%f28,%f28 1544 faddd %f38,%f30,%f30 1545 1546 fmuld %f0,%f6,%f6 1547 faddd %f4,%f2,%f4 1548 ldd [%l4+8],%f32 1549 1550 faddd %f10,%f14,%f14 1551 ldd [%l5+16],%f34 1552 1553 fmuld %f16,%f22,%f22 1554 faddd %f20,%f18,%f20 1555 ldd [%l6+8],%f36 1556 1557 faddd %f26,%f30,%f30 1558 ldd [%l7+16],%f38 1559 1560 fmuld %f32,%f4,%f4 1561 1562 fmuld %f34,%f14,%f14 1563 1564 fmuld %f36,%f20,%f20 1565 1566 fmuld %f38,%f30,%f30 1567 1568 fsubd %f6,%f4,%f6 1569 1570 faddd %f14,%f12,%f14 1571 1572 fsubd %f22,%f20,%f22 1573 1574 faddd %f30,%f28,%f30 1575 1576 faddd %f6,%f0,%f6 1577 1578 faddd %f14,%f8,%f14 1579 1580 faddd %f22,%f16,%f22 1581 1582 faddd %f30,%f24,%f30 1583 mov %l0,%l4 1584 1585 fnegd %f6,%f4 1586 lda [%i1]%asi,%l0 ! preload next argument 1587 1588 fnegd %f14,%f12 1589 lda [%i1]%asi,%f0 1590 1591 fnegd %f22,%f20 1592 lda [%i1+4]%asi,%f3 1593 1594 fnegd %f30,%f28 1595 andn %l0,%i5,%l0 1596 add %i1,%i2,%i1 1597 1598 andcc %l4,2,%g0 1599 fmovdnz %icc,%f4,%f6 1600 st %f6,[%o0] 1601 1602 andcc %l1,2,%g0 1603 fmovdnz %icc,%f12,%f14 1604 st %f14,[%o1] 1605 1606 andcc %l2,2,%g0 1607 fmovdnz %icc,%f20,%f22 1608 st %f22,[%o2] 1609 1610 andcc %l3,2,%g0 1611 fmovdnz %icc,%f28,%f30 1612 st %f30,[%o3] 1613 1614 addcc %i0,-1,%i0 1615 bg,pt %icc,.loop0 1616! delay slot 1617 st %f7,[%o0+4] 1618 1619 ba,pt %icc,.end 1620! delay slot 1621 nop 1622 1623 .align 16 1624.case6: 1625 fmuld %f26,%f26,%f24 1626 andcc %l3,1,%g0 1627 bz,pn %icc,.case7 1628! delay slot 1629 fxor %f30,%f38,%f38 1630 1631 fmuld %f8,pp3,%f14 ! sin(x1) 1632 1633 fmuld %f16,pp3,%f22 ! sin(x2) 1634 1635 fmuld %f0,qq3,%f6 ! cos(x0) 1636 1637 faddd %f14,pp2,%f14 1638 fmuld %f8,qq2,%f12 1639 1640 faddd %f22,pp2,%f22 1641 fmuld %f16,qq2,%f20 1642 1643 fmuld %f24,qq3,%f30 ! cos(x3) 1644 1645 faddd %f6,qq2,%f6 1646 fmuld %f0,pp2,%f4 1647 1648 fmuld %f8,%f14,%f14 1649 faddd %f12,qq1,%f12 1650 1651 fmuld %f16,%f22,%f22 1652 faddd %f20,qq1,%f20 1653 1654 faddd %f30,qq2,%f30 1655 fmuld %f24,pp2,%f28 1656 1657 fmuld %f0,%f6,%f6 1658 faddd %f4,pp1,%f4 1659 1660 faddd %f14,pp1,%f14 1661 fmuld %f8,%f12,%f12 1662 add %l5,%g1,%l5 1663 1664 faddd %f22,pp1,%f22 1665 fmuld %f16,%f20,%f20 1666 add %l6,%g1,%l6 1667 1668 fmuld %f24,%f30,%f30 1669 faddd %f28,pp1,%f28 1670 1671 faddd %f6,qq1,%f6 1672 fmuld %f0,%f4,%f4 1673 add %l4,%g1,%l4 1674 1675 fmuld %f8,%f14,%f14 1676 1677 fmuld %f16,%f22,%f22 1678 1679 faddd %f30,qq1,%f30 1680 fmuld %f24,%f28,%f28 1681 add %l7,%g1,%l7 1682 1683 fmuld %f2,%f4,%f4 1684 1685 fmuld %f10,%f14,%f14 1686 ldd [%l5+8],%f8 1687 1688 fmuld %f18,%f22,%f22 1689 ldd [%l6+8],%f16 1690 1691 fmuld %f26,%f28,%f28 1692 1693 fmuld %f0,%f6,%f6 1694 faddd %f4,%f32,%f4 1695 ldd [%l4+16],%f0 1696 1697 fmuld %f8,%f12,%f12 1698 faddd %f34,%f14,%f14 1699 1700 fmuld %f16,%f20,%f20 1701 faddd %f36,%f22,%f22 1702 1703 fmuld %f24,%f30,%f30 1704 faddd %f28,%f38,%f28 1705 ldd [%l7+16],%f24 1706 1707 fmuld %f0,%f6,%f6 1708 faddd %f4,%f2,%f4 1709 ldd [%l4+8],%f32 1710 1711 faddd %f10,%f14,%f14 1712 ldd [%l5+16],%f34 1713 1714 faddd %f18,%f22,%f22 1715 ldd [%l6+16],%f36 1716 1717 fmuld %f24,%f30,%f30 1718 faddd %f28,%f26,%f28 1719 ldd [%l7+8],%f38 1720 1721 fmuld %f32,%f4,%f4 1722 1723 fmuld %f34,%f14,%f14 1724 1725 fmuld %f36,%f22,%f22 1726 1727 fmuld %f38,%f28,%f28 1728 1729 fsubd %f6,%f4,%f6 1730 1731 faddd %f14,%f12,%f14 1732 1733 faddd %f22,%f20,%f22 1734 1735 fsubd %f30,%f28,%f30 1736 1737 faddd %f6,%f0,%f6 1738 1739 faddd %f14,%f8,%f14 1740 1741 faddd %f22,%f16,%f22 1742 1743 faddd %f30,%f24,%f30 1744 mov %l0,%l4 1745 1746 fnegd %f6,%f4 1747 lda [%i1]%asi,%l0 ! preload next argument 1748 1749 fnegd %f14,%f12 1750 lda [%i1]%asi,%f0 1751 1752 fnegd %f22,%f20 1753 lda [%i1+4]%asi,%f3 1754 1755 fnegd %f30,%f28 1756 andn %l0,%i5,%l0 1757 add %i1,%i2,%i1 1758 1759 andcc %l4,2,%g0 1760 fmovdnz %icc,%f4,%f6 1761 st %f6,[%o0] 1762 1763 andcc %l1,2,%g0 1764 fmovdnz %icc,%f12,%f14 1765 st %f14,[%o1] 1766 1767 andcc %l2,2,%g0 1768 fmovdnz %icc,%f20,%f22 1769 st %f22,[%o2] 1770 1771 andcc %l3,2,%g0 1772 fmovdnz %icc,%f28,%f30 1773 st %f30,[%o3] 1774 1775 addcc %i0,-1,%i0 1776 bg,pt %icc,.loop0 1777! delay slot 1778 st %f7,[%o0+4] 1779 1780 ba,pt %icc,.end 1781! delay slot 1782 nop 1783 1784 .align 16 1785.case7: 1786 fmuld %f8,pp3,%f14 ! sin(x1) 1787 1788 fmuld %f16,pp3,%f22 ! sin(x2) 1789 1790 fmuld %f24,pp3,%f30 ! sin(x3) 1791 1792 fmuld %f0,qq3,%f6 ! cos(x0) 1793 1794 faddd %f14,pp2,%f14 1795 fmuld %f8,qq2,%f12 1796 1797 faddd %f22,pp2,%f22 1798 fmuld %f16,qq2,%f20 1799 1800 faddd %f30,pp2,%f30 1801 fmuld %f24,qq2,%f28 1802 1803 faddd %f6,qq2,%f6 1804 fmuld %f0,pp2,%f4 1805 1806 fmuld %f8,%f14,%f14 1807 faddd %f12,qq1,%f12 1808 1809 fmuld %f16,%f22,%f22 1810 faddd %f20,qq1,%f20 1811 1812 fmuld %f24,%f30,%f30 1813 faddd %f28,qq1,%f28 1814 1815 fmuld %f0,%f6,%f6 1816 faddd %f4,pp1,%f4 1817 1818 faddd %f14,pp1,%f14 1819 fmuld %f8,%f12,%f12 1820 add %l5,%g1,%l5 1821 1822 faddd %f22,pp1,%f22 1823 fmuld %f16,%f20,%f20 1824 add %l6,%g1,%l6 1825 1826 faddd %f30,pp1,%f30 1827 fmuld %f24,%f28,%f28 1828 add %l7,%g1,%l7 1829 1830 faddd %f6,qq1,%f6 1831 fmuld %f0,%f4,%f4 1832 add %l4,%g1,%l4 1833 1834 fmuld %f8,%f14,%f14 1835 1836 fmuld %f16,%f22,%f22 1837 1838 fmuld %f24,%f30,%f30 1839 1840 fmuld %f2,%f4,%f4 1841 1842 fmuld %f10,%f14,%f14 1843 ldd [%l5+8],%f8 1844 1845 fmuld %f18,%f22,%f22 1846 ldd [%l6+8],%f16 1847 1848 fmuld %f26,%f30,%f30 1849 ldd [%l7+8],%f24 1850 1851 fmuld %f0,%f6,%f6 1852 faddd %f4,%f32,%f4 1853 ldd [%l4+16],%f0 1854 1855 fmuld %f8,%f12,%f12 1856 faddd %f34,%f14,%f14 1857 1858 fmuld %f16,%f20,%f20 1859 faddd %f36,%f22,%f22 1860 1861 fmuld %f24,%f28,%f28 1862 faddd %f38,%f30,%f30 1863 1864 fmuld %f0,%f6,%f6 1865 faddd %f4,%f2,%f4 1866 ldd [%l4+8],%f32 1867 1868 faddd %f10,%f14,%f14 1869 ldd [%l5+16],%f34 1870 1871 faddd %f18,%f22,%f22 1872 ldd [%l6+16],%f36 1873 1874 faddd %f26,%f30,%f30 1875 ldd [%l7+16],%f38 1876 1877 fmuld %f32,%f4,%f4 1878 1879 fmuld %f34,%f14,%f14 1880 1881 fmuld %f36,%f22,%f22 1882 1883 fmuld %f38,%f30,%f30 1884 1885 fsubd %f6,%f4,%f6 1886 1887 faddd %f14,%f12,%f14 1888 1889 faddd %f22,%f20,%f22 1890 1891 faddd %f30,%f28,%f30 1892 1893 faddd %f6,%f0,%f6 1894 1895 faddd %f14,%f8,%f14 1896 1897 faddd %f22,%f16,%f22 1898 1899 faddd %f30,%f24,%f30 1900 mov %l0,%l4 1901 1902 fnegd %f6,%f4 1903 lda [%i1]%asi,%l0 ! preload next argument 1904 1905 fnegd %f14,%f12 1906 lda [%i1]%asi,%f0 1907 1908 fnegd %f22,%f20 1909 lda [%i1+4]%asi,%f3 1910 1911 fnegd %f30,%f28 1912 andn %l0,%i5,%l0 1913 add %i1,%i2,%i1 1914 1915 andcc %l4,2,%g0 1916 fmovdnz %icc,%f4,%f6 1917 st %f6,[%o0] 1918 1919 andcc %l1,2,%g0 1920 fmovdnz %icc,%f12,%f14 1921 st %f14,[%o1] 1922 1923 andcc %l2,2,%g0 1924 fmovdnz %icc,%f20,%f22 1925 st %f22,[%o2] 1926 1927 andcc %l3,2,%g0 1928 fmovdnz %icc,%f28,%f30 1929 st %f30,[%o3] 1930 1931 addcc %i0,-1,%i0 1932 bg,pt %icc,.loop0 1933! delay slot 1934 st %f7,[%o0+4] 1935 1936 ba,pt %icc,.end 1937! delay slot 1938 nop 1939 1940 .align 16 1941.case8: 1942 fmuld %f10,%f10,%f8 1943 andcc %l1,1,%g0 1944 bz,pn %icc,.case12 1945! delay slot 1946 fxor %f14,%f34,%f34 1947 1948 fmuld %f18,%f18,%f16 1949 andcc %l2,1,%g0 1950 bz,pn %icc,.case10 1951! delay slot 1952 fxor %f22,%f36,%f36 1953 1954 fmuld %f26,%f26,%f24 1955 andcc %l3,1,%g0 1956 bz,pn %icc,.case9 1957! delay slot 1958 fxor %f30,%f38,%f38 1959 1960 fmuld %f0,pp3,%f6 ! sin(x0) 1961 1962 faddd %f6,pp2,%f6 1963 fmuld %f0,qq2,%f4 1964 1965 fmuld %f8,qq3,%f14 ! cos(x1) 1966 1967 fmuld %f16,qq3,%f22 ! cos(x2) 1968 1969 fmuld %f24,qq3,%f30 ! cos(x3) 1970 1971 fmuld %f0,%f6,%f6 1972 faddd %f4,qq1,%f4 1973 1974 faddd %f14,qq2,%f14 1975 fmuld %f8,pp2,%f12 1976 1977 faddd %f22,qq2,%f22 1978 fmuld %f16,pp2,%f20 1979 1980 faddd %f30,qq2,%f30 1981 fmuld %f24,pp2,%f28 1982 1983 faddd %f6,pp1,%f6 1984 fmuld %f0,%f4,%f4 1985 add %l4,%g1,%l4 1986 1987 fmuld %f8,%f14,%f14 1988 faddd %f12,pp1,%f12 1989 1990 fmuld %f16,%f22,%f22 1991 faddd %f20,pp1,%f20 1992 1993 fmuld %f24,%f30,%f30 1994 faddd %f28,pp1,%f28 1995 1996 fmuld %f0,%f6,%f6 1997 1998 faddd %f14,qq1,%f14 1999 fmuld %f8,%f12,%f12 2000 add %l5,%g1,%l5 2001 2002 faddd %f22,qq1,%f22 2003 fmuld %f16,%f20,%f20 2004 add %l6,%g1,%l6 2005 2006 faddd %f30,qq1,%f30 2007 fmuld %f24,%f28,%f28 2008 add %l7,%g1,%l7 2009 2010 fmuld %f2,%f6,%f6 2011 ldd [%l4+8],%f0 2012 2013 fmuld %f10,%f12,%f12 2014 2015 fmuld %f18,%f20,%f20 2016 2017 fmuld %f26,%f28,%f28 2018 2019 fmuld %f0,%f4,%f4 2020 faddd %f32,%f6,%f6 2021 2022 fmuld %f8,%f14,%f14 2023 faddd %f12,%f34,%f12 2024 ldd [%l5+16],%f8 2025 2026 fmuld %f16,%f22,%f22 2027 faddd %f20,%f36,%f20 2028 ldd [%l6+16],%f16 2029 2030 fmuld %f24,%f30,%f30 2031 faddd %f28,%f38,%f28 2032 ldd [%l7+16],%f24 2033 2034 faddd %f2,%f6,%f6 2035 ldd [%l4+16],%f32 2036 2037 fmuld %f8,%f14,%f14 2038 faddd %f12,%f10,%f12 2039 ldd [%l5+8],%f34 2040 2041 fmuld %f16,%f22,%f22 2042 faddd %f20,%f18,%f20 2043 ldd [%l6+8],%f36 2044 2045 fmuld %f24,%f30,%f30 2046 faddd %f28,%f26,%f28 2047 ldd [%l7+8],%f38 2048 2049 fmuld %f32,%f6,%f6 2050 2051 fmuld %f34,%f12,%f12 2052 2053 fmuld %f36,%f20,%f20 2054 2055 fmuld %f38,%f28,%f28 2056 2057 faddd %f6,%f4,%f6 2058 2059 fsubd %f14,%f12,%f14 2060 2061 fsubd %f22,%f20,%f22 2062 2063 fsubd %f30,%f28,%f30 2064 2065 faddd %f6,%f0,%f6 2066 2067 faddd %f14,%f8,%f14 2068 2069 faddd %f22,%f16,%f22 2070 2071 faddd %f30,%f24,%f30 2072 mov %l0,%l4 2073 2074 fnegd %f6,%f4 2075 lda [%i1]%asi,%l0 ! preload next argument 2076 2077 fnegd %f14,%f12 2078 lda [%i1]%asi,%f0 2079 2080 fnegd %f22,%f20 2081 lda [%i1+4]%asi,%f3 2082 2083 fnegd %f30,%f28 2084 andn %l0,%i5,%l0 2085 add %i1,%i2,%i1 2086 2087 andcc %l4,2,%g0 2088 fmovdnz %icc,%f4,%f6 2089 st %f6,[%o0] 2090 2091 andcc %l1,2,%g0 2092 fmovdnz %icc,%f12,%f14 2093 st %f14,[%o1] 2094 2095 andcc %l2,2,%g0 2096 fmovdnz %icc,%f20,%f22 2097 st %f22,[%o2] 2098 2099 andcc %l3,2,%g0 2100 fmovdnz %icc,%f28,%f30 2101 st %f30,[%o3] 2102 2103 addcc %i0,-1,%i0 2104 bg,pt %icc,.loop0 2105! delay slot 2106 st %f7,[%o0+4] 2107 2108 ba,pt %icc,.end 2109! delay slot 2110 nop 2111 2112 .align 16 2113.case9: 2114 fmuld %f0,pp3,%f6 ! sin(x0) 2115 2116 fmuld %f24,pp3,%f30 ! sin(x3) 2117 2118 faddd %f6,pp2,%f6 2119 fmuld %f0,qq2,%f4 2120 2121 fmuld %f8,qq3,%f14 ! cos(x1) 2122 2123 fmuld %f16,qq3,%f22 ! cos(x2) 2124 2125 faddd %f30,pp2,%f30 2126 fmuld %f24,qq2,%f28 2127 2128 fmuld %f0,%f6,%f6 2129 faddd %f4,qq1,%f4 2130 2131 faddd %f14,qq2,%f14 2132 fmuld %f8,pp2,%f12 2133 2134 faddd %f22,qq2,%f22 2135 fmuld %f16,pp2,%f20 2136 2137 fmuld %f24,%f30,%f30 2138 faddd %f28,qq1,%f28 2139 2140 faddd %f6,pp1,%f6 2141 fmuld %f0,%f4,%f4 2142 add %l4,%g1,%l4 2143 2144 fmuld %f8,%f14,%f14 2145 faddd %f12,pp1,%f12 2146 2147 fmuld %f16,%f22,%f22 2148 faddd %f20,pp1,%f20 2149 2150 faddd %f30,pp1,%f30 2151 fmuld %f24,%f28,%f28 2152 add %l7,%g1,%l7 2153 2154 fmuld %f0,%f6,%f6 2155 2156 faddd %f14,qq1,%f14 2157 fmuld %f8,%f12,%f12 2158 add %l5,%g1,%l5 2159 2160 faddd %f22,qq1,%f22 2161 fmuld %f16,%f20,%f20 2162 add %l6,%g1,%l6 2163 2164 fmuld %f24,%f30,%f30 2165 2166 fmuld %f2,%f6,%f6 2167 ldd [%l4+8],%f0 2168 2169 fmuld %f10,%f12,%f12 2170 2171 fmuld %f18,%f20,%f20 2172 2173 fmuld %f26,%f30,%f30 2174 ldd [%l7+8],%f24 2175 2176 fmuld %f0,%f4,%f4 2177 faddd %f32,%f6,%f6 2178 2179 fmuld %f8,%f14,%f14 2180 faddd %f12,%f34,%f12 2181 ldd [%l5+16],%f8 2182 2183 fmuld %f16,%f22,%f22 2184 faddd %f20,%f36,%f20 2185 ldd [%l6+16],%f16 2186 2187 fmuld %f24,%f28,%f28 2188 faddd %f38,%f30,%f30 2189 2190 faddd %f2,%f6,%f6 2191 ldd [%l4+16],%f32 2192 2193 fmuld %f8,%f14,%f14 2194 faddd %f12,%f10,%f12 2195 ldd [%l5+8],%f34 2196 2197 fmuld %f16,%f22,%f22 2198 faddd %f20,%f18,%f20 2199 ldd [%l6+8],%f36 2200 2201 faddd %f26,%f30,%f30 2202 ldd [%l7+16],%f38 2203 2204 fmuld %f32,%f6,%f6 2205 2206 fmuld %f34,%f12,%f12 2207 2208 fmuld %f36,%f20,%f20 2209 2210 fmuld %f38,%f30,%f30 2211 2212 faddd %f6,%f4,%f6 2213 2214 fsubd %f14,%f12,%f14 2215 2216 fsubd %f22,%f20,%f22 2217 2218 faddd %f30,%f28,%f30 2219 2220 faddd %f6,%f0,%f6 2221 2222 faddd %f14,%f8,%f14 2223 2224 faddd %f22,%f16,%f22 2225 2226 faddd %f30,%f24,%f30 2227 mov %l0,%l4 2228 2229 fnegd %f6,%f4 2230 lda [%i1]%asi,%l0 ! preload next argument 2231 2232 fnegd %f14,%f12 2233 lda [%i1]%asi,%f0 2234 2235 fnegd %f22,%f20 2236 lda [%i1+4]%asi,%f3 2237 2238 fnegd %f30,%f28 2239 andn %l0,%i5,%l0 2240 add %i1,%i2,%i1 2241 2242 andcc %l4,2,%g0 2243 fmovdnz %icc,%f4,%f6 2244 st %f6,[%o0] 2245 2246 andcc %l1,2,%g0 2247 fmovdnz %icc,%f12,%f14 2248 st %f14,[%o1] 2249 2250 andcc %l2,2,%g0 2251 fmovdnz %icc,%f20,%f22 2252 st %f22,[%o2] 2253 2254 andcc %l3,2,%g0 2255 fmovdnz %icc,%f28,%f30 2256 st %f30,[%o3] 2257 2258 addcc %i0,-1,%i0 2259 bg,pt %icc,.loop0 2260! delay slot 2261 st %f7,[%o0+4] 2262 2263 ba,pt %icc,.end 2264! delay slot 2265 nop 2266 2267 .align 16 2268.case10: 2269 fmuld %f26,%f26,%f24 2270 andcc %l3,1,%g0 2271 bz,pn %icc,.case11 2272! delay slot 2273 fxor %f30,%f38,%f38 2274 2275 fmuld %f0,pp3,%f6 ! sin(x0) 2276 2277 fmuld %f16,pp3,%f22 ! sin(x2) 2278 2279 faddd %f6,pp2,%f6 2280 fmuld %f0,qq2,%f4 2281 2282 fmuld %f8,qq3,%f14 ! cos(x1) 2283 2284 faddd %f22,pp2,%f22 2285 fmuld %f16,qq2,%f20 2286 2287 fmuld %f24,qq3,%f30 ! cos(x3) 2288 2289 fmuld %f0,%f6,%f6 2290 faddd %f4,qq1,%f4 2291 2292 faddd %f14,qq2,%f14 2293 fmuld %f8,pp2,%f12 2294 2295 fmuld %f16,%f22,%f22 2296 faddd %f20,qq1,%f20 2297 2298 faddd %f30,qq2,%f30 2299 fmuld %f24,pp2,%f28 2300 2301 faddd %f6,pp1,%f6 2302 fmuld %f0,%f4,%f4 2303 add %l4,%g1,%l4 2304 2305 fmuld %f8,%f14,%f14 2306 faddd %f12,pp1,%f12 2307 2308 faddd %f22,pp1,%f22 2309 fmuld %f16,%f20,%f20 2310 add %l6,%g1,%l6 2311 2312 fmuld %f24,%f30,%f30 2313 faddd %f28,pp1,%f28 2314 2315 fmuld %f0,%f6,%f6 2316 2317 faddd %f14,qq1,%f14 2318 fmuld %f8,%f12,%f12 2319 add %l5,%g1,%l5 2320 2321 fmuld %f16,%f22,%f22 2322 2323 faddd %f30,qq1,%f30 2324 fmuld %f24,%f28,%f28 2325 add %l7,%g1,%l7 2326 2327 fmuld %f2,%f6,%f6 2328 ldd [%l4+8],%f0 2329 2330 fmuld %f10,%f12,%f12 2331 2332 fmuld %f18,%f22,%f22 2333 ldd [%l6+8],%f16 2334 2335 fmuld %f26,%f28,%f28 2336 2337 fmuld %f0,%f4,%f4 2338 faddd %f32,%f6,%f6 2339 2340 fmuld %f8,%f14,%f14 2341 faddd %f12,%f34,%f12 2342 ldd [%l5+16],%f8 2343 2344 fmuld %f16,%f20,%f20 2345 faddd %f36,%f22,%f22 2346 2347 fmuld %f24,%f30,%f30 2348 faddd %f28,%f38,%f28 2349 ldd [%l7+16],%f24 2350 2351 faddd %f2,%f6,%f6 2352 ldd [%l4+16],%f32 2353 2354 fmuld %f8,%f14,%f14 2355 faddd %f12,%f10,%f12 2356 ldd [%l5+8],%f34 2357 2358 faddd %f18,%f22,%f22 2359 ldd [%l6+16],%f36 2360 2361 fmuld %f24,%f30,%f30 2362 faddd %f28,%f26,%f28 2363 ldd [%l7+8],%f38 2364 2365 fmuld %f32,%f6,%f6 2366 2367 fmuld %f34,%f12,%f12 2368 2369 fmuld %f36,%f22,%f22 2370 2371 fmuld %f38,%f28,%f28 2372 2373 faddd %f6,%f4,%f6 2374 2375 fsubd %f14,%f12,%f14 2376 2377 faddd %f22,%f20,%f22 2378 2379 fsubd %f30,%f28,%f30 2380 2381 faddd %f6,%f0,%f6 2382 2383 faddd %f14,%f8,%f14 2384 2385 faddd %f22,%f16,%f22 2386 2387 faddd %f30,%f24,%f30 2388 mov %l0,%l4 2389 2390 fnegd %f6,%f4 2391 lda [%i1]%asi,%l0 ! preload next argument 2392 2393 fnegd %f14,%f12 2394 lda [%i1]%asi,%f0 2395 2396 fnegd %f22,%f20 2397 lda [%i1+4]%asi,%f3 2398 2399 fnegd %f30,%f28 2400 andn %l0,%i5,%l0 2401 add %i1,%i2,%i1 2402 2403 andcc %l4,2,%g0 2404 fmovdnz %icc,%f4,%f6 2405 st %f6,[%o0] 2406 2407 andcc %l1,2,%g0 2408 fmovdnz %icc,%f12,%f14 2409 st %f14,[%o1] 2410 2411 andcc %l2,2,%g0 2412 fmovdnz %icc,%f20,%f22 2413 st %f22,[%o2] 2414 2415 andcc %l3,2,%g0 2416 fmovdnz %icc,%f28,%f30 2417 st %f30,[%o3] 2418 2419 addcc %i0,-1,%i0 2420 bg,pt %icc,.loop0 2421! delay slot 2422 st %f7,[%o0+4] 2423 2424 ba,pt %icc,.end 2425! delay slot 2426 nop 2427 2428 .align 16 2429.case11: 2430 fmuld %f0,pp3,%f6 ! sin(x0) 2431 2432 fmuld %f16,pp3,%f22 ! sin(x2) 2433 2434 fmuld %f24,pp3,%f30 ! sin(x3) 2435 2436 faddd %f6,pp2,%f6 2437 fmuld %f0,qq2,%f4 2438 2439 fmuld %f8,qq3,%f14 ! cos(x1) 2440 2441 faddd %f22,pp2,%f22 2442 fmuld %f16,qq2,%f20 2443 2444 faddd %f30,pp2,%f30 2445 fmuld %f24,qq2,%f28 2446 2447 fmuld %f0,%f6,%f6 2448 faddd %f4,qq1,%f4 2449 2450 faddd %f14,qq2,%f14 2451 fmuld %f8,pp2,%f12 2452 2453 fmuld %f16,%f22,%f22 2454 faddd %f20,qq1,%f20 2455 2456 fmuld %f24,%f30,%f30 2457 faddd %f28,qq1,%f28 2458 2459 faddd %f6,pp1,%f6 2460 fmuld %f0,%f4,%f4 2461 add %l4,%g1,%l4 2462 2463 fmuld %f8,%f14,%f14 2464 faddd %f12,pp1,%f12 2465 2466 faddd %f22,pp1,%f22 2467 fmuld %f16,%f20,%f20 2468 add %l6,%g1,%l6 2469 2470 faddd %f30,pp1,%f30 2471 fmuld %f24,%f28,%f28 2472 add %l7,%g1,%l7 2473 2474 fmuld %f0,%f6,%f6 2475 2476 faddd %f14,qq1,%f14 2477 fmuld %f8,%f12,%f12 2478 add %l5,%g1,%l5 2479 2480 fmuld %f16,%f22,%f22 2481 2482 fmuld %f24,%f30,%f30 2483 2484 fmuld %f2,%f6,%f6 2485 ldd [%l4+8],%f0 2486 2487 fmuld %f10,%f12,%f12 2488 2489 fmuld %f18,%f22,%f22 2490 ldd [%l6+8],%f16 2491 2492 fmuld %f26,%f30,%f30 2493 ldd [%l7+8],%f24 2494 2495 fmuld %f0,%f4,%f4 2496 faddd %f32,%f6,%f6 2497 2498 fmuld %f8,%f14,%f14 2499 faddd %f12,%f34,%f12 2500 ldd [%l5+16],%f8 2501 2502 fmuld %f16,%f20,%f20 2503 faddd %f36,%f22,%f22 2504 2505 fmuld %f24,%f28,%f28 2506 faddd %f38,%f30,%f30 2507 2508 faddd %f2,%f6,%f6 2509 ldd [%l4+16],%f32 2510 2511 fmuld %f8,%f14,%f14 2512 faddd %f12,%f10,%f12 2513 ldd [%l5+8],%f34 2514 2515 faddd %f18,%f22,%f22 2516 ldd [%l6+16],%f36 2517 2518 faddd %f26,%f30,%f30 2519 ldd [%l7+16],%f38 2520 2521 fmuld %f32,%f6,%f6 2522 2523 fmuld %f34,%f12,%f12 2524 2525 fmuld %f36,%f22,%f22 2526 2527 fmuld %f38,%f30,%f30 2528 2529 faddd %f6,%f4,%f6 2530 2531 fsubd %f14,%f12,%f14 2532 2533 faddd %f22,%f20,%f22 2534 2535 faddd %f30,%f28,%f30 2536 2537 faddd %f6,%f0,%f6 2538 2539 faddd %f14,%f8,%f14 2540 2541 faddd %f22,%f16,%f22 2542 2543 faddd %f30,%f24,%f30 2544 mov %l0,%l4 2545 2546 fnegd %f6,%f4 2547 lda [%i1]%asi,%l0 ! preload next argument 2548 2549 fnegd %f14,%f12 2550 lda [%i1]%asi,%f0 2551 2552 fnegd %f22,%f20 2553 lda [%i1+4]%asi,%f3 2554 2555 fnegd %f30,%f28 2556 andn %l0,%i5,%l0 2557 add %i1,%i2,%i1 2558 2559 andcc %l4,2,%g0 2560 fmovdnz %icc,%f4,%f6 2561 st %f6,[%o0] 2562 2563 andcc %l1,2,%g0 2564 fmovdnz %icc,%f12,%f14 2565 st %f14,[%o1] 2566 2567 andcc %l2,2,%g0 2568 fmovdnz %icc,%f20,%f22 2569 st %f22,[%o2] 2570 2571 andcc %l3,2,%g0 2572 fmovdnz %icc,%f28,%f30 2573 st %f30,[%o3] 2574 2575 addcc %i0,-1,%i0 2576 bg,pt %icc,.loop0 2577! delay slot 2578 st %f7,[%o0+4] 2579 2580 ba,pt %icc,.end 2581! delay slot 2582 nop 2583 2584 .align 16 2585.case12: 2586 fmuld %f18,%f18,%f16 2587 andcc %l2,1,%g0 2588 bz,pn %icc,.case14 2589! delay slot 2590 fxor %f22,%f36,%f36 2591 2592 fmuld %f26,%f26,%f24 2593 andcc %l3,1,%g0 2594 bz,pn %icc,.case13 2595! delay slot 2596 fxor %f30,%f38,%f38 2597 2598 fmuld %f0,pp3,%f6 ! sin(x0) 2599 2600 fmuld %f8,pp3,%f14 ! sin(x1) 2601 2602 faddd %f6,pp2,%f6 2603 fmuld %f0,qq2,%f4 2604 2605 faddd %f14,pp2,%f14 2606 fmuld %f8,qq2,%f12 2607 2608 fmuld %f16,qq3,%f22 ! cos(x2) 2609 2610 fmuld %f24,qq3,%f30 ! cos(x3) 2611 2612 fmuld %f0,%f6,%f6 2613 faddd %f4,qq1,%f4 2614 2615 fmuld %f8,%f14,%f14 2616 faddd %f12,qq1,%f12 2617 2618 faddd %f22,qq2,%f22 2619 fmuld %f16,pp2,%f20 2620 2621 faddd %f30,qq2,%f30 2622 fmuld %f24,pp2,%f28 2623 2624 faddd %f6,pp1,%f6 2625 fmuld %f0,%f4,%f4 2626 add %l4,%g1,%l4 2627 2628 faddd %f14,pp1,%f14 2629 fmuld %f8,%f12,%f12 2630 add %l5,%g1,%l5 2631 2632 fmuld %f16,%f22,%f22 2633 faddd %f20,pp1,%f20 2634 2635 fmuld %f24,%f30,%f30 2636 faddd %f28,pp1,%f28 2637 2638 fmuld %f0,%f6,%f6 2639 2640 fmuld %f8,%f14,%f14 2641 2642 faddd %f22,qq1,%f22 2643 fmuld %f16,%f20,%f20 2644 add %l6,%g1,%l6 2645 2646 faddd %f30,qq1,%f30 2647 fmuld %f24,%f28,%f28 2648 add %l7,%g1,%l7 2649 2650 fmuld %f2,%f6,%f6 2651 ldd [%l4+8],%f0 2652 2653 fmuld %f10,%f14,%f14 2654 ldd [%l5+8],%f8 2655 2656 fmuld %f18,%f20,%f20 2657 2658 fmuld %f26,%f28,%f28 2659 2660 fmuld %f0,%f4,%f4 2661 faddd %f32,%f6,%f6 2662 2663 fmuld %f8,%f12,%f12 2664 faddd %f34,%f14,%f14 2665 2666 fmuld %f16,%f22,%f22 2667 faddd %f20,%f36,%f20 2668 ldd [%l6+16],%f16 2669 2670 fmuld %f24,%f30,%f30 2671 faddd %f28,%f38,%f28 2672 ldd [%l7+16],%f24 2673 2674 faddd %f2,%f6,%f6 2675 ldd [%l4+16],%f32 2676 2677 faddd %f10,%f14,%f14 2678 ldd [%l5+16],%f34 2679 2680 fmuld %f16,%f22,%f22 2681 faddd %f20,%f18,%f20 2682 ldd [%l6+8],%f36 2683 2684 fmuld %f24,%f30,%f30 2685 faddd %f28,%f26,%f28 2686 ldd [%l7+8],%f38 2687 2688 fmuld %f32,%f6,%f6 2689 2690 fmuld %f34,%f14,%f14 2691 2692 fmuld %f36,%f20,%f20 2693 2694 fmuld %f38,%f28,%f28 2695 2696 faddd %f6,%f4,%f6 2697 2698 faddd %f14,%f12,%f14 2699 2700 fsubd %f22,%f20,%f22 2701 2702 fsubd %f30,%f28,%f30 2703 2704 faddd %f6,%f0,%f6 2705 2706 faddd %f14,%f8,%f14 2707 2708 faddd %f22,%f16,%f22 2709 2710 faddd %f30,%f24,%f30 2711 mov %l0,%l4 2712 2713 fnegd %f6,%f4 2714 lda [%i1]%asi,%l0 ! preload next argument 2715 2716 fnegd %f14,%f12 2717 lda [%i1]%asi,%f0 2718 2719 fnegd %f22,%f20 2720 lda [%i1+4]%asi,%f3 2721 2722 fnegd %f30,%f28 2723 andn %l0,%i5,%l0 2724 add %i1,%i2,%i1 2725 2726 andcc %l4,2,%g0 2727 fmovdnz %icc,%f4,%f6 2728 st %f6,[%o0] 2729 2730 andcc %l1,2,%g0 2731 fmovdnz %icc,%f12,%f14 2732 st %f14,[%o1] 2733 2734 andcc %l2,2,%g0 2735 fmovdnz %icc,%f20,%f22 2736 st %f22,[%o2] 2737 2738 andcc %l3,2,%g0 2739 fmovdnz %icc,%f28,%f30 2740 st %f30,[%o3] 2741 2742 addcc %i0,-1,%i0 2743 bg,pt %icc,.loop0 2744! delay slot 2745 st %f7,[%o0+4] 2746 2747 ba,pt %icc,.end 2748! delay slot 2749 nop 2750 2751 .align 16 2752.case13: 2753 fmuld %f0,pp3,%f6 ! sin(x0) 2754 2755 fmuld %f8,pp3,%f14 ! sin(x1) 2756 2757 fmuld %f24,pp3,%f30 ! sin(x3) 2758 2759 faddd %f6,pp2,%f6 2760 fmuld %f0,qq2,%f4 2761 2762 faddd %f14,pp2,%f14 2763 fmuld %f8,qq2,%f12 2764 2765 fmuld %f16,qq3,%f22 ! cos(x2) 2766 2767 faddd %f30,pp2,%f30 2768 fmuld %f24,qq2,%f28 2769 2770 fmuld %f0,%f6,%f6 2771 faddd %f4,qq1,%f4 2772 2773 fmuld %f8,%f14,%f14 2774 faddd %f12,qq1,%f12 2775 2776 faddd %f22,qq2,%f22 2777 fmuld %f16,pp2,%f20 2778 2779 fmuld %f24,%f30,%f30 2780 faddd %f28,qq1,%f28 2781 2782 faddd %f6,pp1,%f6 2783 fmuld %f0,%f4,%f4 2784 add %l4,%g1,%l4 2785 2786 faddd %f14,pp1,%f14 2787 fmuld %f8,%f12,%f12 2788 add %l5,%g1,%l5 2789 2790 fmuld %f16,%f22,%f22 2791 faddd %f20,pp1,%f20 2792 2793 faddd %f30,pp1,%f30 2794 fmuld %f24,%f28,%f28 2795 add %l7,%g1,%l7 2796 2797 fmuld %f0,%f6,%f6 2798 2799 fmuld %f8,%f14,%f14 2800 2801 faddd %f22,qq1,%f22 2802 fmuld %f16,%f20,%f20 2803 add %l6,%g1,%l6 2804 2805 fmuld %f24,%f30,%f30 2806 2807 fmuld %f2,%f6,%f6 2808 ldd [%l4+8],%f0 2809 2810 fmuld %f10,%f14,%f14 2811 ldd [%l5+8],%f8 2812 2813 fmuld %f18,%f20,%f20 2814 2815 fmuld %f26,%f30,%f30 2816 ldd [%l7+8],%f24 2817 2818 fmuld %f0,%f4,%f4 2819 faddd %f32,%f6,%f6 2820 2821 fmuld %f8,%f12,%f12 2822 faddd %f34,%f14,%f14 2823 2824 fmuld %f16,%f22,%f22 2825 faddd %f20,%f36,%f20 2826 ldd [%l6+16],%f16 2827 2828 fmuld %f24,%f28,%f28 2829 faddd %f38,%f30,%f30 2830 2831 faddd %f2,%f6,%f6 2832 ldd [%l4+16],%f32 2833 2834 faddd %f10,%f14,%f14 2835 ldd [%l5+16],%f34 2836 2837 fmuld %f16,%f22,%f22 2838 faddd %f20,%f18,%f20 2839 ldd [%l6+8],%f36 2840 2841 faddd %f26,%f30,%f30 2842 ldd [%l7+16],%f38 2843 2844 fmuld %f32,%f6,%f6 2845 2846 fmuld %f34,%f14,%f14 2847 2848 fmuld %f36,%f20,%f20 2849 2850 fmuld %f38,%f30,%f30 2851 2852 faddd %f6,%f4,%f6 2853 2854 faddd %f14,%f12,%f14 2855 2856 fsubd %f22,%f20,%f22 2857 2858 faddd %f30,%f28,%f30 2859 2860 faddd %f6,%f0,%f6 2861 2862 faddd %f14,%f8,%f14 2863 2864 faddd %f22,%f16,%f22 2865 2866 faddd %f30,%f24,%f30 2867 mov %l0,%l4 2868 2869 fnegd %f6,%f4 2870 lda [%i1]%asi,%l0 ! preload next argument 2871 2872 fnegd %f14,%f12 2873 lda [%i1]%asi,%f0 2874 2875 fnegd %f22,%f20 2876 lda [%i1+4]%asi,%f3 2877 2878 fnegd %f30,%f28 2879 andn %l0,%i5,%l0 2880 add %i1,%i2,%i1 2881 2882 andcc %l4,2,%g0 2883 fmovdnz %icc,%f4,%f6 2884 st %f6,[%o0] 2885 2886 andcc %l1,2,%g0 2887 fmovdnz %icc,%f12,%f14 2888 st %f14,[%o1] 2889 2890 andcc %l2,2,%g0 2891 fmovdnz %icc,%f20,%f22 2892 st %f22,[%o2] 2893 2894 andcc %l3,2,%g0 2895 fmovdnz %icc,%f28,%f30 2896 st %f30,[%o3] 2897 2898 addcc %i0,-1,%i0 2899 bg,pt %icc,.loop0 2900! delay slot 2901 st %f7,[%o0+4] 2902 2903 ba,pt %icc,.end 2904! delay slot 2905 nop 2906 2907 .align 16 2908.case14: 2909 fmuld %f26,%f26,%f24 2910 andcc %l3,1,%g0 2911 bz,pn %icc,.case15 2912! delay slot 2913 fxor %f30,%f38,%f38 2914 2915 fmuld %f0,pp3,%f6 ! sin(x0) 2916 2917 fmuld %f8,pp3,%f14 ! sin(x1) 2918 2919 fmuld %f16,pp3,%f22 ! sin(x2) 2920 2921 faddd %f6,pp2,%f6 2922 fmuld %f0,qq2,%f4 2923 2924 faddd %f14,pp2,%f14 2925 fmuld %f8,qq2,%f12 2926 2927 faddd %f22,pp2,%f22 2928 fmuld %f16,qq2,%f20 2929 2930 fmuld %f24,qq3,%f30 ! cos(x3) 2931 2932 fmuld %f0,%f6,%f6 2933 faddd %f4,qq1,%f4 2934 2935 fmuld %f8,%f14,%f14 2936 faddd %f12,qq1,%f12 2937 2938 fmuld %f16,%f22,%f22 2939 faddd %f20,qq1,%f20 2940 2941 faddd %f30,qq2,%f30 2942 fmuld %f24,pp2,%f28 2943 2944 faddd %f6,pp1,%f6 2945 fmuld %f0,%f4,%f4 2946 add %l4,%g1,%l4 2947 2948 faddd %f14,pp1,%f14 2949 fmuld %f8,%f12,%f12 2950 add %l5,%g1,%l5 2951 2952 faddd %f22,pp1,%f22 2953 fmuld %f16,%f20,%f20 2954 add %l6,%g1,%l6 2955 2956 fmuld %f24,%f30,%f30 2957 faddd %f28,pp1,%f28 2958 2959 fmuld %f0,%f6,%f6 2960 2961 fmuld %f8,%f14,%f14 2962 2963 fmuld %f16,%f22,%f22 2964 2965 faddd %f30,qq1,%f30 2966 fmuld %f24,%f28,%f28 2967 add %l7,%g1,%l7 2968 2969 fmuld %f2,%f6,%f6 2970 ldd [%l4+8],%f0 2971 2972 fmuld %f10,%f14,%f14 2973 ldd [%l5+8],%f8 2974 2975 fmuld %f18,%f22,%f22 2976 ldd [%l6+8],%f16 2977 2978 fmuld %f26,%f28,%f28 2979 2980 fmuld %f0,%f4,%f4 2981 faddd %f32,%f6,%f6 2982 2983 fmuld %f8,%f12,%f12 2984 faddd %f34,%f14,%f14 2985 2986 fmuld %f16,%f20,%f20 2987 faddd %f36,%f22,%f22 2988 2989 fmuld %f24,%f30,%f30 2990 faddd %f28,%f38,%f28 2991 ldd [%l7+16],%f24 2992 2993 faddd %f2,%f6,%f6 2994 ldd [%l4+16],%f32 2995 2996 faddd %f10,%f14,%f14 2997 ldd [%l5+16],%f34 2998 2999 faddd %f18,%f22,%f22 3000 ldd [%l6+16],%f36 3001 3002 fmuld %f24,%f30,%f30 3003 faddd %f28,%f26,%f28 3004 ldd [%l7+8],%f38 3005 3006 fmuld %f32,%f6,%f6 3007 3008 fmuld %f34,%f14,%f14 3009 3010 fmuld %f36,%f22,%f22 3011 3012 fmuld %f38,%f28,%f28 3013 3014 faddd %f6,%f4,%f6 3015 3016 faddd %f14,%f12,%f14 3017 3018 faddd %f22,%f20,%f22 3019 3020 fsubd %f30,%f28,%f30 3021 3022 faddd %f6,%f0,%f6 3023 3024 faddd %f14,%f8,%f14 3025 3026 faddd %f22,%f16,%f22 3027 3028 faddd %f30,%f24,%f30 3029 mov %l0,%l4 3030 3031 fnegd %f6,%f4 3032 lda [%i1]%asi,%l0 ! preload next argument 3033 3034 fnegd %f14,%f12 3035 lda [%i1]%asi,%f0 3036 3037 fnegd %f22,%f20 3038 lda [%i1+4]%asi,%f3 3039 3040 fnegd %f30,%f28 3041 andn %l0,%i5,%l0 3042 add %i1,%i2,%i1 3043 3044 andcc %l4,2,%g0 3045 fmovdnz %icc,%f4,%f6 3046 st %f6,[%o0] 3047 3048 andcc %l1,2,%g0 3049 fmovdnz %icc,%f12,%f14 3050 st %f14,[%o1] 3051 3052 andcc %l2,2,%g0 3053 fmovdnz %icc,%f20,%f22 3054 st %f22,[%o2] 3055 3056 andcc %l3,2,%g0 3057 fmovdnz %icc,%f28,%f30 3058 st %f30,[%o3] 3059 3060 addcc %i0,-1,%i0 3061 bg,pt %icc,.loop0 3062! delay slot 3063 st %f7,[%o0+4] 3064 3065 ba,pt %icc,.end 3066! delay slot 3067 nop 3068 3069 .align 16 3070.case15: 3071 fmuld %f0,pp3,%f6 ! sin(x0) 3072 3073 fmuld %f8,pp3,%f14 ! sin(x1) 3074 3075 fmuld %f16,pp3,%f22 ! sin(x2) 3076 3077 fmuld %f24,pp3,%f30 ! sin(x3) 3078 3079 faddd %f6,pp2,%f6 3080 fmuld %f0,qq2,%f4 3081 3082 faddd %f14,pp2,%f14 3083 fmuld %f8,qq2,%f12 3084 3085 faddd %f22,pp2,%f22 3086 fmuld %f16,qq2,%f20 3087 3088 faddd %f30,pp2,%f30 3089 fmuld %f24,qq2,%f28 3090 3091 fmuld %f0,%f6,%f6 3092 faddd %f4,qq1,%f4 3093 3094 fmuld %f8,%f14,%f14 3095 faddd %f12,qq1,%f12 3096 3097 fmuld %f16,%f22,%f22 3098 faddd %f20,qq1,%f20 3099 3100 fmuld %f24,%f30,%f30 3101 faddd %f28,qq1,%f28 3102 3103 faddd %f6,pp1,%f6 3104 fmuld %f0,%f4,%f4 3105 add %l4,%g1,%l4 3106 3107 faddd %f14,pp1,%f14 3108 fmuld %f8,%f12,%f12 3109 add %l5,%g1,%l5 3110 3111 faddd %f22,pp1,%f22 3112 fmuld %f16,%f20,%f20 3113 add %l6,%g1,%l6 3114 3115 faddd %f30,pp1,%f30 3116 fmuld %f24,%f28,%f28 3117 add %l7,%g1,%l7 3118 3119 fmuld %f0,%f6,%f6 3120 3121 fmuld %f8,%f14,%f14 3122 3123 fmuld %f16,%f22,%f22 3124 3125 fmuld %f24,%f30,%f30 3126 3127 fmuld %f2,%f6,%f6 3128 ldd [%l4+8],%f0 3129 3130 fmuld %f10,%f14,%f14 3131 ldd [%l5+8],%f8 3132 3133 fmuld %f18,%f22,%f22 3134 ldd [%l6+8],%f16 3135 3136 fmuld %f26,%f30,%f30 3137 ldd [%l7+8],%f24 3138 3139 fmuld %f0,%f4,%f4 3140 faddd %f32,%f6,%f6 3141 3142 fmuld %f8,%f12,%f12 3143 faddd %f34,%f14,%f14 3144 3145 fmuld %f16,%f20,%f20 3146 faddd %f36,%f22,%f22 3147 3148 fmuld %f24,%f28,%f28 3149 faddd %f38,%f30,%f30 3150 3151 faddd %f2,%f6,%f6 3152 ldd [%l4+16],%f32 3153 3154 faddd %f10,%f14,%f14 3155 ldd [%l5+16],%f34 3156 3157 faddd %f18,%f22,%f22 3158 ldd [%l6+16],%f36 3159 3160 faddd %f26,%f30,%f30 3161 ldd [%l7+16],%f38 3162 3163 fmuld %f32,%f6,%f6 3164 3165 fmuld %f34,%f14,%f14 3166 3167 fmuld %f36,%f22,%f22 3168 3169 fmuld %f38,%f30,%f30 3170 3171 faddd %f6,%f4,%f6 3172 3173 faddd %f14,%f12,%f14 3174 3175 faddd %f22,%f20,%f22 3176 3177 faddd %f30,%f28,%f30 3178 3179 faddd %f6,%f0,%f6 3180 3181 faddd %f14,%f8,%f14 3182 3183 faddd %f22,%f16,%f22 3184 3185 faddd %f30,%f24,%f30 3186 mov %l0,%l4 3187 3188 fnegd %f6,%f4 3189 lda [%i1]%asi,%l0 ! preload next argument 3190 3191 fnegd %f14,%f12 3192 lda [%i1]%asi,%f0 3193 3194 fnegd %f22,%f20 3195 lda [%i1+4]%asi,%f3 3196 3197 fnegd %f30,%f28 3198 andn %l0,%i5,%l0 3199 add %i1,%i2,%i1 3200 3201 andcc %l4,2,%g0 3202 fmovdnz %icc,%f4,%f6 3203 st %f6,[%o0] 3204 3205 andcc %l1,2,%g0 3206 fmovdnz %icc,%f12,%f14 3207 st %f14,[%o1] 3208 3209 andcc %l2,2,%g0 3210 fmovdnz %icc,%f20,%f22 3211 st %f22,[%o2] 3212 3213 andcc %l3,2,%g0 3214 fmovdnz %icc,%f28,%f30 3215 st %f30,[%o3] 3216 3217 addcc %i0,-1,%i0 3218 bg,pt %icc,.loop0 3219! delay slot 3220 st %f7,[%o0+4] 3221 3222 ba,pt %icc,.end 3223! delay slot 3224 nop 3225 3226 3227 .align 16 3228.end: 3229 st %f15,[%o1+4] 3230 st %f23,[%o2+4] 3231 st %f31,[%o3+4] 3232 ld [%fp+biguns],%i5 3233 tst %i5 ! check for huge arguments remaining 3234 be,pt %icc,.exit 3235! delay slot 3236 nop 3237#ifdef __sparcv9 3238 ldx [%fp+xsave],%o1 3239 ldx [%fp+ysave],%o3 3240#else 3241 ld [%fp+xsave],%o1 3242 ld [%fp+ysave],%o3 3243#endif 3244 ld [%fp+nsave],%o0 3245 ld [%fp+sxsave],%o2 3246 ld [%fp+sysave],%o4 3247 sra %o2,0,%o2 ! sign-extend for V9 3248 sra %o4,0,%o4 3249 call __vlibm_vcos_big_ultra3 3250 sra %o5,0,%o5 ! delay slot 3251 3252.exit: 3253 ret 3254 restore 3255 3256 3257 .align 16 3258.last1: 3259 faddd %f2,c3two44,%f4 3260 st %f15,[%o1+4] 3261.last1_from_range1: 3262 mov 0,%l1 3263 fzeros %f8 3264 fzero %f10 3265 add %fp,junk,%o1 3266.last2: 3267 faddd %f10,c3two44,%f12 3268 st %f23,[%o2+4] 3269.last2_from_range2: 3270 mov 0,%l2 3271 fzeros %f16 3272 fzero %f18 3273 add %fp,junk,%o2 3274.last3: 3275 faddd %f18,c3two44,%f20 3276 st %f31,[%o3+4] 3277 st %f5,[%fp+nk0] 3278 st %f13,[%fp+nk1] 3279.last3_from_range3: 3280 mov 0,%l3 3281 fzeros %f24 3282 fzero %f26 3283 ba,pt %icc,.cont 3284! delay slot 3285 add %fp,junk,%o3 3286 3287 3288 .align 16 3289.range0: 3290 cmp %l0,%o4 3291 bl,pt %icc,1f ! hx < 0x3e400000 3292! delay slot, harmless if branch taken 3293 sethi %hi(0x7ff00000),%o7 3294 cmp %l0,%o7 3295 bl,a,pt %icc,2f ! branch if finite 3296! delay slot, squashed if branch not taken 3297 st %o4,[%fp+biguns] ! set biguns 3298 fzero %f0 3299 fmuld %f2,%f0,%f2 3300 st %f2,[%o0] 3301 ba,pt %icc,2f 3302! delay slot 3303 st %f3,[%o0+4] 33041: 3305 fdtoi %f2,%f4 ! raise inexact if not zero 3306 sethi %hi(0x3ff00000),%o7 3307 st %o7,[%o0] 3308 st %g0,[%o0+4] 33092: 3310 addcc %i0,-1,%i0 3311 ble,pn %icc,.end 3312! delay slot, harmless if branch taken 3313 add %i3,%i4,%i3 ! y += stridey 3314 andn %l1,%i5,%l0 ! hx &= ~0x80000000 3315 fmovs %f8,%f0 3316 fmovs %f11,%f3 3317 ba,pt %icc,.loop0 3318! delay slot 3319 add %i1,%i2,%i1 ! x += stridex 3320 3321 3322 .align 16 3323.range1: 3324 cmp %l1,%o4 3325 bl,pt %icc,1f ! hx < 0x3e400000 3326! delay slot, harmless if branch taken 3327 sethi %hi(0x7ff00000),%o7 3328 cmp %l1,%o7 3329 bl,a,pt %icc,2f ! branch if finite 3330! delay slot, squashed if branch not taken 3331 st %o4,[%fp+biguns] ! set biguns 3332 fzero %f8 3333 fmuld %f10,%f8,%f10 3334 st %f10,[%o1] 3335 ba,pt %icc,2f 3336! delay slot 3337 st %f11,[%o1+4] 33381: 3339 fdtoi %f10,%f12 ! raise inexact if not zero 3340 sethi %hi(0x3ff00000),%o7 3341 st %o7,[%o1] 3342 st %g0,[%o1+4] 33432: 3344 addcc %i0,-1,%i0 3345 ble,pn %icc,.last1_from_range1 3346! delay slot, harmless if branch taken 3347 add %i3,%i4,%i3 ! y += stridey 3348 andn %l2,%i5,%l1 ! hx &= ~0x80000000 3349 fmovs %f16,%f8 3350 fmovs %f19,%f11 3351 ba,pt %icc,.loop1 3352! delay slot 3353 add %i1,%i2,%i1 ! x += stridex 3354 3355 3356 .align 16 3357.range2: 3358 cmp %l2,%o4 3359 bl,pt %icc,1f ! hx < 0x3e400000 3360! delay slot, harmless if branch taken 3361 sethi %hi(0x7ff00000),%o7 3362 cmp %l2,%o7 3363 bl,a,pt %icc,2f ! branch if finite 3364! delay slot, squashed if branch not taken 3365 st %o4,[%fp+biguns] ! set biguns 3366 fzero %f16 3367 fmuld %f18,%f16,%f18 3368 st %f18,[%o2] 3369 ba,pt %icc,2f 3370! delay slot 3371 st %f19,[%o2+4] 33721: 3373 fdtoi %f18,%f20 ! raise inexact if not zero 3374 sethi %hi(0x3ff00000),%o7 3375 st %o7,[%o2] 3376 st %g0,[%o2+4] 33772: 3378 addcc %i0,-1,%i0 3379 ble,pn %icc,.last2_from_range2 3380! delay slot, harmless if branch taken 3381 add %i3,%i4,%i3 ! y += stridey 3382 andn %l3,%i5,%l2 ! hx &= ~0x80000000 3383 fmovs %f24,%f16 3384 fmovs %f27,%f19 3385 ba,pt %icc,.loop2 3386! delay slot 3387 add %i1,%i2,%i1 ! x += stridex 3388 3389 3390 .align 16 3391.range3: 3392 cmp %l3,%o4 3393 bl,pt %icc,1f ! hx < 0x3e400000 3394! delay slot, harmless if branch taken 3395 sethi %hi(0x7ff00000),%o7 3396 cmp %l3,%o7 3397 bl,a,pt %icc,2f ! branch if finite 3398! delay slot, squashed if branch not taken 3399 st %o4,[%fp+biguns] ! set biguns 3400 fzero %f24 3401 fmuld %f26,%f24,%f26 3402 st %f26,[%o3] 3403 ba,pt %icc,2f 3404! delay slot 3405 st %f27,[%o3+4] 34061: 3407 fdtoi %f26,%f28 ! raise inexact if not zero 3408 sethi %hi(0x3ff00000),%o7 3409 st %o7,[%o3] 3410 st %g0,[%o3+4] 34112: 3412 addcc %i0,-1,%i0 3413 ble,pn %icc,.last3_from_range3 3414! delay slot, harmless if branch taken 3415 add %i3,%i4,%i3 ! y += stridey 3416 ld [%i1],%l3 3417 ld [%i1],%f24 3418 ld [%i1+4],%f27 3419 andn %l3,%i5,%l3 ! hx &= ~0x80000000 3420 ba,pt %icc,.loop3 3421! delay slot 3422 add %i1,%i2,%i1 ! x += stridex 3423 3424 SET_SIZE(__vcos_ultra3) 3425 3426