1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24/* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vsincos.S" 30 31#include "libm.h" 32 33 RO_DATA 34 .align 64 35constants: 36 .word 0x42c80000,0x00000000 ! 3 * 2^44 37 .word 0x43380000,0x00000000 ! 3 * 2^51 38 .word 0x3fe45f30,0x6dc9c883 ! invpio2 39 .word 0x3ff921fb,0x54442c00 ! pio2_1 40 .word 0x3d318469,0x898cc400 ! pio2_2 41 .word 0x3a71701b,0x839a2520 ! pio2_3 42 .word 0xbfc55555,0x55555533 ! pp1 43 .word 0x3f811111,0x10e7d53b ! pp2 44 .word 0xbf2a0167,0xe6b3cf9b ! pp3 45 .word 0xbfdfffff,0xffffff65 ! qq1 46 .word 0x3fa55555,0x54f88ed0 ! qq2 47 .word 0xbf56c12c,0xdd185f60 ! qq3 48 49! local storage indices 50 51#define xsave STACK_BIAS-0x8 52#define ssave STACK_BIAS-0x10 53#define csave STACK_BIAS-0x18 54#define nsave STACK_BIAS-0x1c 55#define sxsave STACK_BIAS-0x20 56#define sssave STACK_BIAS-0x24 57#define biguns STACK_BIAS-0x28 58#define junk STACK_BIAS-0x30 59#define nk2 STACK_BIAS-0x38 60#define nk1 STACK_BIAS-0x3c 61#define nk0 STACK_BIAS-0x40 62! sizeof temp storage - must be a multiple of 16 for V9 63#define tmps 0x40 64 65! register use 66 67! i0 n 68! i1 x 69! i2 stridex 70! i3 s 71! i4 strides 72! i5 0x80000000,n0 73 74! l0 hx0,k0 75! l1 hx1,k1 76! l2 hx2,k2 77! l3 c 78! l4 pc0 79! l5 pc1 80! l6 pc2 81! l7 stridec 82 83! the following are 64-bit registers in both V8+ and V9 84 85! g1 __vlibm_TBL_sincos2 86! g5 scratch,n1 87 88! o0 ps0 89! o1 ps1 90! o2 ps2 91! o3 0x3fe921fb 92! o4 0x3e400000 93! o5 0x4099251e 94! o7 scratch,n2 95 96! f0 x0,z0 97! f2 abs(x0) 98! f4 99! f6 100! f8 101! f10 x1,z1 102! f12 abs(x1) 103! f14 104! f16 105! f18 106! f20 x2,z2 107! f22 abs(x2) 108! f24 109! f26 110! f28 111! f30 112! f32 113! f34 114! f36 115! f38 116 117#define c3two44 %f40 118#define c3two51 %f42 119#define invpio2 %f44 120#define pio2_1 %f46 121#define pio2_2 %f48 122#define pio2_3 %f50 123#define pp1 %f52 124#define pp2 %f54 125#define pp3 %f56 126#define qq1 %f58 127#define qq2 %f60 128#define qq3 %f62 129 130 ENTRY(__vsincos) 131 save %sp,-SA(MINFRAME)-tmps,%sp 132 PIC_SETUP(l7) 133 PIC_SET(l7,constants,o0) 134 PIC_SET(l7,__vlibm_TBL_sincos2,o1) 135 mov %o1,%g1 136 wr %g0,0x82,%asi ! set %asi for non-faulting loads 137#ifdef __sparcv9 138 stx %i1,[%fp+xsave] ! save arguments 139 stx %i3,[%fp+ssave] 140 stx %i5,[%fp+csave] 141 ldx [%fp+STACK_BIAS+0xb0],%l7 142#else 143 st %i1,[%fp+xsave] ! save arguments 144 st %i3,[%fp+ssave] 145 st %i5,[%fp+csave] 146 ld [%fp+0x5c],%l7 147#endif 148 st %i0,[%fp+nsave] 149 st %i2,[%fp+sxsave] 150 st %i4,[%fp+sssave] 151 mov %i5,%l3 152 st %g0,[%fp+biguns] ! biguns = 0 153 ldd [%o0+0x00],c3two44 ! load/set up constants 154 ldd [%o0+0x08],c3two51 155 ldd [%o0+0x10],invpio2 156 ldd [%o0+0x18],pio2_1 157 ldd [%o0+0x20],pio2_2 158 ldd [%o0+0x28],pio2_3 159 ldd [%o0+0x30],pp1 160 ldd [%o0+0x38],pp2 161 ldd [%o0+0x40],pp3 162 ldd [%o0+0x48],qq1 163 ldd [%o0+0x50],qq2 164 ldd [%o0+0x58],qq3 165 sethi %hi(0x80000000),%i5 166 sethi %hi(0x3e400000),%o4 167 sethi %hi(0x3fe921fb),%o3 168 or %o3,%lo(0x3fe921fb),%o3 169 sethi %hi(0x4099251e),%o5 170 or %o5,%lo(0x4099251e),%o5 171 sll %i2,3,%i2 ! scale strides 172 sll %i4,3,%i4 173 sll %l7,3,%l7 174 add %fp,junk,%o0 ! loop prologue 175 add %fp,junk,%o1 176 add %fp,junk,%o2 177 ld [%i1],%l0 ! *x 178 ld [%i1],%f0 179 ld [%i1+4],%f3 180 andn %l0,%i5,%l0 ! mask off sign 181 ba .loop0 182 add %i1,%i2,%i1 ! x += stridex 183 184! 16-byte aligned 185 .align 16 186.loop0: 187 lda [%i1]%asi,%l1 ! preload next argument 188 sub %l0,%o4,%g5 189 sub %o5,%l0,%o7 190 fabss %f0,%f2 191 192 lda [%i1]%asi,%f10 193 orcc %o7,%g5,%g0 194 mov %i3,%o0 ! ps0 = s 195 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e 196 197! delay slot 198 lda [%i1+4]%asi,%f13 199 addcc %i0,-1,%i0 200 add %i3,%i4,%i3 ! s += strides 201 202 mov %l3,%l4 ! pc0 = c 203 add %l3,%l7,%l3 ! c += stridec 204 ble,pn %icc,.last1 205 206! delay slot 207 andn %l1,%i5,%l1 208 add %i1,%i2,%i1 ! x += stridex 209 faddd %f2,c3two44,%f4 210 st %f17,[%o1+4] 211 212.loop1: 213 lda [%i1]%asi,%l2 ! preload next argument 214 sub %l1,%o4,%g5 215 sub %o5,%l1,%o7 216 fabss %f10,%f12 217 218 lda [%i1]%asi,%f20 219 orcc %o7,%g5,%g0 220 mov %i3,%o1 ! ps1 = s 221 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e 222 223! delay slot 224 lda [%i1+4]%asi,%f23 225 addcc %i0,-1,%i0 226 add %i3,%i4,%i3 ! s += strides 227 228 mov %l3,%l5 ! pc1 = c 229 add %l3,%l7,%l3 ! c += stridec 230 ble,pn %icc,.last2 231 232! delay slot 233 andn %l2,%i5,%l2 234 add %i1,%i2,%i1 ! x += stridex 235 faddd %f12,c3two44,%f14 236 st %f27,[%o2+4] 237 238.loop2: 239 sub %l2,%o4,%g5 240 sub %o5,%l2,%o7 241 fabss %f20,%f22 242 st %f5,[%fp+nk0] 243 244 orcc %o7,%g5,%g0 245 mov %i3,%o2 ! ps2 = s 246 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e 247! delay slot 248 st %f15,[%fp+nk1] 249 250 mov %l3,%l6 ! pc2 = c 251 252.cont: 253 add %i3,%i4,%i3 ! s += strides 254 add %l3,%l7,%l3 ! c += stridec 255 faddd %f22,c3two44,%f24 256 st %f25,[%fp+nk2] 257 258 sub %o3,%l0,%l0 259 sub %o3,%l1,%l1 260 fmovs %f3,%f1 261 262 sub %o3,%l2,%l2 263 fmovs %f13,%f11 264 265 or %l0,%l1,%l0 266 orcc %l0,%l2,%g0 267 fmovs %f23,%f21 268 269 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range 270 271 fmuld %f10,invpio2,%f16 272 ld [%fp+nk0],%l0 273 274 fmuld %f20,invpio2,%f26 275 ld [%fp+nk1],%l1 276 277 bl,pn %icc,.medium 278! delay slot 279 ld [%fp+nk2],%l2 280 281 sll %l0,5,%l0 ! k 282 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 283 284 sll %l1,5,%l1 285 ldd [%l0+%g1],%f4 286 fcmpd %fcc1,%f10,pio2_3 287 288 sll %l2,5,%l2 289 ldd [%l1+%g1],%f14 290 fcmpd %fcc2,%f20,pio2_3 291 292 ldd [%l2+%g1],%f24 293 294 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 295 296 fsubd %f12,%f14,%f12 297 298 fsubd %f22,%f24,%f22 299 300 fmuld %f2,%f2,%f0 ! z = x * x 301 302 fmuld %f12,%f12,%f10 303 304 fmuld %f22,%f22,%f20 305 306 fmuld %f0,pp3,%f6 307 308 fmuld %f10,pp3,%f16 309 310 fmuld %f20,pp3,%f26 311 312 faddd %f6,pp2,%f6 313 fmuld %f0,qq3,%f4 314 315 faddd %f16,pp2,%f16 316 fmuld %f10,qq3,%f14 317 318 faddd %f26,pp2,%f26 319 fmuld %f20,qq3,%f24 320 321 fmuld %f0,%f6,%f6 322 faddd %f4,qq2,%f4 323 324 fmuld %f10,%f16,%f16 325 faddd %f14,qq2,%f14 326 327 fmuld %f20,%f26,%f26 328 faddd %f24,qq2,%f24 329 330 faddd %f6,pp1,%f6 331 fmuld %f0,%f4,%f4 332 add %l0,%g1,%l0 333 334 faddd %f16,pp1,%f16 335 fmuld %f10,%f14,%f14 336 add %l1,%g1,%l1 337 338 faddd %f26,pp1,%f26 339 fmuld %f20,%f24,%f24 340 add %l2,%g1,%l2 341 342 fmuld %f0,%f6,%f6 343 faddd %f4,qq1,%f4 344 345 fmuld %f10,%f16,%f16 346 faddd %f14,qq1,%f14 347 348 fmuld %f20,%f26,%f26 349 faddd %f24,qq1,%f24 350 351 fmuld %f2,%f6,%f6 352 ldd [%l0+8],%f8 353 354 fmuld %f12,%f16,%f16 355 ldd [%l1+8],%f18 356 357 fmuld %f22,%f26,%f26 358 ldd [%l2+8],%f28 359 360 faddd %f6,%f2,%f6 361 fmuld %f0,%f4,%f4 362 ldd [%l0+16],%f30 363 364 faddd %f16,%f12,%f16 365 fmuld %f10,%f14,%f14 366 ldd [%l1+16],%f32 367 368 faddd %f26,%f22,%f26 369 fmuld %f20,%f24,%f24 370 ldd [%l2+16],%f34 371 372 fmuld %f8,%f6,%f0 ! s * spoly 373 374 fmuld %f18,%f16,%f10 375 376 fmuld %f28,%f26,%f20 377 378 fmuld %f30,%f4,%f2 ! c * cpoly 379 380 fmuld %f32,%f14,%f12 381 382 fmuld %f34,%f24,%f22 383 384 fmuld %f30,%f6,%f6 ! c * spoly 385 fsubd %f2,%f0,%f2 386 387 fmuld %f32,%f16,%f16 388 fsubd %f12,%f10,%f12 389 390 fmuld %f34,%f26,%f26 391 fsubd %f22,%f20,%f22 392 393 fmuld %f8,%f4,%f4 ! s * cpoly 394 faddd %f2,%f30,%f2 395 st %f2,[%l4] 396 397 fmuld %f18,%f14,%f14 398 faddd %f12,%f32,%f12 399 st %f3,[%l4+4] 400 401 fmuld %f28,%f24,%f24 402 faddd %f22,%f34,%f22 403 st %f12,[%l5] 404 405 faddd %f6,%f4,%f6 406 st %f13,[%l5+4] 407 408 faddd %f16,%f14,%f16 409 st %f22,[%l6] 410 411 faddd %f26,%f24,%f26 412 st %f23,[%l6+4] 413 414 faddd %f6,%f8,%f6 415 416 faddd %f16,%f18,%f16 417 418 faddd %f26,%f28,%f26 419 420 fnegd %f6,%f4 421 lda [%i1]%asi,%l0 ! preload next argument 422 423 fnegd %f16,%f14 424 lda [%i1]%asi,%f0 425 426 fnegd %f26,%f24 427 lda [%i1+4]%asi,%f3 428 andn %l0,%i5,%l0 429 add %i1,%i2,%i1 430 431 fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s 432 st %f6,[%o0] 433 434 fmovdl %fcc1,%f14,%f16 435 st %f16,[%o1] 436 437 fmovdl %fcc2,%f24,%f26 438 st %f26,[%o2] 439 addcc %i0,-1,%i0 440 441 bg,pt %icc,.loop0 442! delay slot 443 st %f7,[%o0+4] 444 445 ba,pt %icc,.end 446! delay slot 447 nop 448 449 450 .align 16 451.medium: 452 faddd %f6,c3two51,%f4 453 st %f5,[%fp+nk0] 454 455 faddd %f16,c3two51,%f14 456 st %f15,[%fp+nk1] 457 458 faddd %f26,c3two51,%f24 459 st %f25,[%fp+nk2] 460 461 fsubd %f4,c3two51,%f6 462 463 fsubd %f14,c3two51,%f16 464 465 fsubd %f24,c3two51,%f26 466 467 fmuld %f6,pio2_1,%f2 468 ld [%fp+nk0],%i5 ! n 469 470 fmuld %f16,pio2_1,%f12 471 ld [%fp+nk1],%g5 472 473 fmuld %f26,pio2_1,%f22 474 ld [%fp+nk2],%o7 475 476 fsubd %f0,%f2,%f0 477 fmuld %f6,pio2_2,%f4 478 mov %o0,%o4 ! if (n & 1) swap ps, pc 479 andcc %i5,1,%g0 480 481 fsubd %f10,%f12,%f10 482 fmuld %f16,pio2_2,%f14 483 movnz %icc,%l4,%o0 484 and %i5,3,%i5 485 486 fsubd %f20,%f22,%f20 487 fmuld %f26,pio2_2,%f24 488 movnz %icc,%o4,%l4 489 490 fsubd %f0,%f4,%f30 491 mov %o1,%o4 492 andcc %g5,1,%g0 493 494 fsubd %f10,%f14,%f32 495 movnz %icc,%l5,%o1 496 and %g5,3,%g5 497 498 fsubd %f20,%f24,%f34 499 movnz %icc,%o4,%l5 500 501 fsubd %f0,%f30,%f0 502 fcmple32 %f30,pio2_3,%l0 ! x <= pio2_3 iff x < 0 503 mov %o2,%o4 504 andcc %o7,1,%g0 505 506 fsubd %f10,%f32,%f10 507 fcmple32 %f32,pio2_3,%l1 508 movnz %icc,%l6,%o2 509 and %o7,3,%o7 510 511 fsubd %f20,%f34,%f20 512 fcmple32 %f34,pio2_3,%l2 513 movnz %icc,%o4,%l6 514 515 fsubd %f0,%f4,%f0 516 fmuld %f6,pio2_3,%f6 517 add %i5,1,%o4 ! n = (n >> 1) | (((n + 1) ^ l) & 2) 518 srl %i5,1,%i5 519 520 fsubd %f10,%f14,%f10 521 fmuld %f16,pio2_3,%f16 522 xor %o4,%l0,%o4 523 524 fsubd %f20,%f24,%f20 525 fmuld %f26,pio2_3,%f26 526 and %o4,2,%o4 527 528 fsubd %f6,%f0,%f6 529 or %i5,%o4,%i5 530 531 fsubd %f16,%f10,%f16 532 add %g5,1,%o4 533 srl %g5,1,%g5 534 535 fsubd %f26,%f20,%f26 536 xor %o4,%l1,%o4 537 538 fsubd %f30,%f6,%f0 ! reduced x 539 and %o4,2,%o4 540 541 fsubd %f32,%f16,%f10 542 or %g5,%o4,%g5 543 544 fsubd %f34,%f26,%f20 545 add %o7,1,%o4 546 srl %o7,1,%o7 547 548 fzero %f38 549 xor %o4,%l2,%o4 550 551 fabsd %f0,%f2 552 and %o4,2,%o4 553 554 fabsd %f10,%f12 555 or %o7,%o4,%o7 556 557 fabsd %f20,%f22 558 sethi %hi(0x3e400000),%o4 559 560 fnegd %f38,%f38 561 562 faddd %f2,c3two44,%f4 563 st %f5,[%fp+nk0] 564 565 faddd %f12,c3two44,%f14 566 st %f15,[%fp+nk1] 567 568 faddd %f22,c3two44,%f24 569 st %f25,[%fp+nk2] 570 571 fsubd %f30,%f0,%f4 572 573 fsubd %f32,%f10,%f14 574 575 fsubd %f34,%f20,%f24 576 577 fsubd %f4,%f6,%f6 ! w 578 ld [%fp+nk0],%l0 579 580 fsubd %f14,%f16,%f16 581 ld [%fp+nk1],%l1 582 583 fsubd %f24,%f26,%f26 584 ld [%fp+nk2],%l2 585 sll %l0,5,%l0 ! k 586 587 fand %f0,%f38,%f30 ! sign bit of x 588 ldd [%l0+%g1],%f4 589 sll %l1,5,%l1 590 591 fand %f10,%f38,%f32 592 ldd [%l1+%g1],%f14 593 sll %l2,5,%l2 594 595 fand %f20,%f38,%f34 596 ldd [%l2+%g1],%f24 597 598 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 599 600 fsubd %f12,%f14,%f12 601 602 fsubd %f22,%f24,%f22 603 604 fmuld %f2,%f2,%f0 ! z = x * x 605 fxor %f6,%f30,%f30 606 607 fmuld %f12,%f12,%f10 608 fxor %f16,%f32,%f32 609 610 fmuld %f22,%f22,%f20 611 fxor %f26,%f34,%f34 612 613 fmuld %f0,pp3,%f6 614 615 fmuld %f10,pp3,%f16 616 617 fmuld %f20,pp3,%f26 618 619 faddd %f6,pp2,%f6 620 fmuld %f0,qq3,%f4 621 622 faddd %f16,pp2,%f16 623 fmuld %f10,qq3,%f14 624 625 faddd %f26,pp2,%f26 626 fmuld %f20,qq3,%f24 627 628 fmuld %f0,%f6,%f6 629 faddd %f4,qq2,%f4 630 631 fmuld %f10,%f16,%f16 632 faddd %f14,qq2,%f14 633 634 fmuld %f20,%f26,%f26 635 faddd %f24,qq2,%f24 636 637 faddd %f6,pp1,%f6 638 fmuld %f0,%f4,%f4 639 add %l0,%g1,%l0 640 641 faddd %f16,pp1,%f16 642 fmuld %f10,%f14,%f14 643 add %l1,%g1,%l1 644 645 faddd %f26,pp1,%f26 646 fmuld %f20,%f24,%f24 647 add %l2,%g1,%l2 648 649 fmuld %f0,%f6,%f6 650 faddd %f4,qq1,%f4 651 652 fmuld %f10,%f16,%f16 653 faddd %f14,qq1,%f14 654 655 fmuld %f20,%f26,%f26 656 faddd %f24,qq1,%f24 657 658 fmuld %f2,%f6,%f6 659 ldd [%l0+16],%f8 660 661 fmuld %f12,%f16,%f16 662 ldd [%l1+16],%f18 663 664 fmuld %f22,%f26,%f26 665 ldd [%l2+16],%f28 666 667 faddd %f6,%f30,%f6 668 fmuld %f0,%f4,%f4 669 ldd [%l0+8],%f30 670 671 faddd %f16,%f32,%f16 672 fmuld %f10,%f14,%f14 673 ldd [%l1+8],%f32 674 675 faddd %f26,%f34,%f26 676 fmuld %f20,%f24,%f24 677 ldd [%l2+8],%f34 678 679 fmuld %f8,%f4,%f0 ! c * cpoly 680 faddd %f6,%f2,%f6 681 682 fmuld %f18,%f14,%f10 683 faddd %f16,%f12,%f16 684 685 fmuld %f28,%f24,%f20 686 faddd %f26,%f22,%f26 687 688 fmuld %f30,%f6,%f2 ! s * spoly 689 690 fmuld %f32,%f16,%f12 691 692 fmuld %f34,%f26,%f22 693 694 fmuld %f8,%f6,%f6 ! c * spoly 695 fsubd %f0,%f2,%f2 696 697 fmuld %f18,%f16,%f16 698 fsubd %f10,%f12,%f12 699 700 fmuld %f28,%f26,%f26 701 fsubd %f20,%f22,%f22 702 703 fmuld %f30,%f4,%f4 ! s * cpoly 704 faddd %f8,%f2,%f8 705 706 fmuld %f32,%f14,%f14 707 faddd %f18,%f12,%f18 708 709 fmuld %f34,%f24,%f24 710 faddd %f28,%f22,%f28 711 712 faddd %f4,%f6,%f6 713 714 faddd %f14,%f16,%f16 715 716 faddd %f24,%f26,%f26 717 718 faddd %f30,%f6,%f6 ! now %f6 = sin |x|, %f8 = cos |x| 719 720 faddd %f32,%f16,%f16 721 722 faddd %f34,%f26,%f26 723 724 fnegd %f8,%f4 ! if (n & 1) c = -c 725 lda [%i1]%asi,%l0 ! preload next argument 726 mov %i5,%l1 727 728 fnegd %f18,%f14 729 lda [%i1]%asi,%f0 730 sethi %hi(0x80000000),%i5 731 732 fnegd %f28,%f24 733 lda [%i1+4]%asi,%f3 734 735 andcc %l1,1,%g0 736 fmovdnz %icc,%f4,%f8 737 st %f8,[%l4] 738 739 andcc %g5,1,%g0 740 fmovdnz %icc,%f14,%f18 741 st %f9,[%l4+4] 742 743 andcc %o7,1,%g0 744 fmovdnz %icc,%f24,%f28 745 st %f18,[%l5] 746 747 fnegd %f6,%f4 ! if (n & 2) s = -s 748 st %f19,[%l5+4] 749 andn %l0,%i5,%l0 750 751 fnegd %f16,%f14 752 st %f28,[%l6] 753 add %i1,%i2,%i1 754 755 fnegd %f26,%f24 756 st %f29,[%l6+4] 757 758 andcc %l1,2,%g0 759 fmovdnz %icc,%f4,%f6 760 st %f6,[%o0] 761 762 andcc %g5,2,%g0 763 fmovdnz %icc,%f14,%f16 764 st %f16,[%o1] 765 766 andcc %o7,2,%g0 767 fmovdnz %icc,%f24,%f26 768 st %f26,[%o2] 769 770 addcc %i0,-1,%i0 771 bg,pt %icc,.loop0 772! delay slot 773 st %f7,[%o0+4] 774 775 ba,pt %icc,.end 776! delay slot 777 nop 778 779 780 .align 16 781.end: 782 st %f17,[%o1+4] 783 st %f27,[%o2+4] 784 ld [%fp+biguns],%i5 785 tst %i5 ! check for huge arguments remaining 786 be,pt %icc,.exit 787! delay slot 788 nop 789#ifdef __sparcv9 790 stx %o5,[%sp+STACK_BIAS+0xb8] 791 ldx [%fp+xsave],%o1 792 ldx [%fp+ssave],%o3 793 ldx [%fp+csave],%o5 794 ldx [%fp+STACK_BIAS+0xb0],%i5 795 stx %i5,[%sp+STACK_BIAS+0xb0] 796#else 797 st %o5,[%sp+0x60] 798 ld [%fp+xsave],%o1 799 ld [%fp+ssave],%o3 800 ld [%fp+csave],%o5 801 ld [%fp+0x5c],%i5 802 st %i5,[%sp+0x5c] 803#endif 804 ld [%fp+nsave],%o0 805 ld [%fp+sxsave],%o2 806 ld [%fp+sssave],%o4 807 sra %o2,0,%o2 ! sign-extend for V9 808 call __vlibm_vsincos_big 809 sra %o4,0,%o4 ! delay slot 810 811.exit: 812 ret 813 restore 814 815 816 .align 16 817.last1: 818 faddd %f2,c3two44,%f4 819 st %f17,[%o1+4] 820.last1_from_range1: 821 mov 0,%l1 822 fzeros %f10 823 fzero %f12 824 add %fp,junk,%o1 825 add %fp,junk,%l5 826.last2: 827 faddd %f12,c3two44,%f14 828 st %f27,[%o2+4] 829 st %f5,[%fp+nk0] 830 st %f15,[%fp+nk1] 831.last2_from_range2: 832 mov 0,%l2 833 fzeros %f20 834 fzero %f22 835 add %fp,junk,%o2 836 ba,pt %icc,.cont 837! delay slot 838 add %fp,junk,%l6 839 840 841 .align 16 842.range0: 843 cmp %l0,%o4 844 bl,pt %icc,1f ! hx < 0x3e400000 845! delay slot, harmless if branch taken 846 sethi %hi(0x7ff00000),%o7 847 cmp %l0,%o7 848 bl,a,pt %icc,2f ! branch if finite 849! delay slot, squashed if branch not taken 850 st %o4,[%fp+biguns] ! set biguns 851 fzero %f0 852 fmuld %f2,%f0,%f2 853 st %f2,[%o0] 854 st %f3,[%o0+4] 855 st %f2,[%l3] 856 ba,pt %icc,2f 857! delay slot 858 st %f3,[%l3+4] 8591: 860 fdtoi %f2,%f4 ! raise inexact if not zero 861 st %f0,[%o0] 862 st %f3,[%o0+4] 863 sethi %hi(0x3ff00000),%g5 864 st %g5,[%l3] 865 st %g0,[%l3+4] 8662: 867 addcc %i0,-1,%i0 868 ble,pn %icc,.end 869! delay slot, harmless if branch taken 870 add %i3,%i4,%i3 ! s += strides 871 add %l3,%l7,%l3 ! c += stridec 872 andn %l1,%i5,%l0 ! hx &= ~0x80000000 873 fmovs %f10,%f0 874 fmovs %f13,%f3 875 ba,pt %icc,.loop0 876! delay slot 877 add %i1,%i2,%i1 ! x += stridex 878 879 880 .align 16 881.range1: 882 cmp %l1,%o4 883 bl,pt %icc,1f ! hx < 0x3e400000 884! delay slot, harmless if branch taken 885 sethi %hi(0x7ff00000),%o7 886 cmp %l1,%o7 887 bl,a,pt %icc,2f ! branch if finite 888! delay slot, squashed if branch not taken 889 st %o4,[%fp+biguns] ! set biguns 890 fzero %f10 891 fmuld %f12,%f10,%f12 892 st %f12,[%o1] 893 st %f13,[%o1+4] 894 st %f12,[%l3] 895 ba,pt %icc,2f 896! delay slot 897 st %f13,[%l3+4] 8981: 899 fdtoi %f12,%f14 ! raise inexact if not zero 900 st %f10,[%o1] 901 st %f13,[%o1+4] 902 sethi %hi(0x3ff00000),%g5 903 st %g5,[%l3] 904 st %g0,[%l3+4] 9052: 906 addcc %i0,-1,%i0 907 ble,pn %icc,.last1_from_range1 908! delay slot, harmless if branch taken 909 add %i3,%i4,%i3 ! s += strides 910 add %l3,%l7,%l3 ! c += stridec 911 andn %l2,%i5,%l1 ! hx &= ~0x80000000 912 fmovs %f20,%f10 913 fmovs %f23,%f13 914 ba,pt %icc,.loop1 915! delay slot 916 add %i1,%i2,%i1 ! x += stridex 917 918 919 .align 16 920.range2: 921 cmp %l2,%o4 922 bl,pt %icc,1f ! hx < 0x3e400000 923! delay slot, harmless if branch taken 924 sethi %hi(0x7ff00000),%o7 925 cmp %l2,%o7 926 bl,a,pt %icc,2f ! branch if finite 927! delay slot, squashed if branch not taken 928 st %o4,[%fp+biguns] ! set biguns 929 fzero %f20 930 fmuld %f22,%f20,%f22 931 st %f22,[%o2] 932 st %f23,[%o2+4] 933 st %f22,[%l3] 934 ba,pt %icc,2f 935! delay slot 936 st %f23,[%l3+4] 9371: 938 fdtoi %f22,%f24 ! raise inexact if not zero 939 st %f20,[%o2] 940 st %f23,[%o2+4] 941 sethi %hi(0x3ff00000),%g5 942 st %g5,[%l3] 943 st %g0,[%l3+4] 9442: 945 addcc %i0,-1,%i0 946 ble,pn %icc,.last2_from_range2 947! delay slot, harmless if branch taken 948 add %i3,%i4,%i3 ! s += strides 949 add %l3,%l7,%l3 ! c += stridec 950 ld [%i1],%l2 951 ld [%i1],%f20 952 ld [%i1+4],%f23 953 andn %l2,%i5,%l2 ! hx &= ~0x80000000 954 ba,pt %icc,.loop2 955! delay slot 956 add %i1,%i2,%i1 ! x += stridex 957 958 SET_SIZE(__vsincos) 959 960