1! 2! Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3! Use is subject to license terms. 4! 5! CDDL HEADER START 6! 7! The contents of this file are subject to the terms of the 8! Common Development and Distribution License, Version 1.0 only 9! (the "License"). You may not use this file except in compliance 10! with the License. 11! 12! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 13! or http://www.opensolaris.org/os/licensing. 14! See the License for the specific language governing permissions 15! and limitations under the License. 16! 17! When distributing Covered Code, include this CDDL HEADER in each 18! file and include the License file at usr/src/OPENSOLARIS.LICENSE. 19! If applicable, add the following below this CDDL HEADER, with the 20! fields enclosed by brackets "[]" replaced with your own identifying 21! information: Portions Copyright [yyyy] [name of copyright owner] 22! 23! CDDL HEADER END 24! 25 26.ident "%Z%%M% %I% %E% SMI" 27 28! /* 29! * This file contains __quad_mag_add and __quad_mag_sub, the core 30! * of the quad precision add and subtract operations. 31! */ 32! SPARC V9 version hand-coded in assembly to use 64-bit integer registers 33 34 .file "__quad_mag64.s" 35 36#include <sys/asm_linkage.h> 37 38! union longdouble { 39! struct { 40! unsigned int msw; 41! unsigned int frac2; 42! unsigned int frac3; 43! unsigned int frac4; 44! } l; 45! struct { 46! unsigned long msll; 47! unsigned long frac; 48! } ll; 49! long double d; 50! }; 51! 52! /* 53! * __quad_mag_add(x, y, z, fsr) 54! * 55! * Sets *z = *x + *y, rounded according to the rounding mode in *fsr, 56! * and updates the current exceptions in *fsr. This routine assumes 57! * *x and *y are finite, with the same sign (i.e., an addition of 58! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. 59! */ 60! void 61! __quad_mag_add(const union longdouble *x, const union longdouble *y, 62! union longdouble *z, unsigned int *fsr) 63! { 64! unsigned long lx, ly, frac, sticky; 65! unsigned int ex, ey, round, rm; 66! int e, uflo; 67! 68! /* get the leading significand double-words and exponents */ 69! ex = (x->ll.msll >> 48) & 0x7fff; 70! lx = x->ll.msll & ~0xffff000000000000ul; 71! if (ex == 0) 72! ex = 1; 73! else 74! lx |= 0x0001000000000000ul; 75! 76! ey = (y->ll.msll >> 48) & 0x7fff; 77! ly = y->ll.msll & ~0xffff000000000000ul; 78! if (ey == 0) 79! ey = 1; 80! else 81! ly |= 0x0001000000000000ul; 82! 83! /* prenormalize y */ 84! e = (int) ex - (int) ey; 85! round = sticky = 0; 86! if (e >= 114) { 87! frac = x->ll.frac; 88! sticky = ly | y->ll.frac; 89! } else { 90! frac = y->ll.frac; 91! if (e >= 64) { 92! sticky = frac & 0x7ffffffffffffffful; 93! round = frac >> 63; 94! frac = ly; 95! ly = 0; 96! e -= 64; 97! } 98! if (e) { 99! sticky |= round | (frac & ((1ul << (e - 1)) - 1)); 100! round = (frac >> (e - 1)) & 1; 101! frac = (frac >> e) | (ly << (64 - e)); 102! ly >>= e; 103! } 104! 105! /* add, propagating carries */ 106! frac += x->ll.frac; 107! lx += ly; 108! if (frac < x->ll.frac) 109! lx++; 110! 111! /* postnormalize */ 112! if (lx >= 0x0002000000000000ul) { 113! sticky |= round; 114! round = frac & 1; 115! frac = (frac >> 1) | (lx << 63); 116! lx >>= 1; 117! ex++; 118! } 119! } 120! 121! /* keep track of whether the result before rounding is tiny */ 122! uflo = (lx < 0x0001000000000000ul); 123! 124! /* get the rounding mode, fudging directed rounding modes 125! as though the result were positive */ 126! rm = *fsr >> 30; 127! if (z->l.msw) 128! rm ^= (rm >> 1); 129! 130! /* see if we need to round */ 131! if (round | sticky) { 132! *fsr |= FSR_NXC; 133! 134! /* round up if necessary */ 135! if (rm == FSR_RP || (rm == FSR_RN && round && 136! (sticky || (frac & 1)))) { 137! if (++frac == 0) 138! if (++lx >= 0x0002000000000000ul) { 139! lx >>= 1; 140! ex++; 141! } 142! } 143! } 144! 145! /* check for overflow */ 146! if (ex >= 0x7fff) { 147! /* store the default overflowed result */ 148! *fsr |= FSR_OFC | FSR_NXC; 149! if (rm == FSR_RN || rm == FSR_RP) { 150! z->l.msw |= 0x7fff0000; 151! z->l.frac2 = 0; 152! z->ll.frac = 0; 153! } else { 154! z->l.msw |= 0x7ffeffff; 155! z->l.frac2 = 0xffffffff; 156! z->ll.frac = 0xfffffffffffffffful; 157! } 158! } else { 159! /* store the result */ 160! if (lx >= 0x0001000000000000ul) 161! z->l.msw |= (ex << 16); 162! z->l.msw |= (lx >> 32) & 0xffff; 163! z->l.frac2 = (lx & 0xffffffff); 164! z->ll.frac = frac; 165! 166! /* if the pre-rounded result was tiny and underflow trapping 167! is enabled, simulate underflow */ 168! if (uflo && (*fsr & FSR_UFM)) 169! *fsr |= FSR_UFC; 170! } 171! } 172 173 ENTRY(__quad_mag_add) 174 save %sp,-SA(MINFRAME),%sp 175 176 sethi %hi(0xffff0000),%g1 177 sllx %g1,32,%g1 ! g1 = 0xffff000000000000 178 179 sethi %hi(0x7fff),%l7 180 or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff 181 182 ldx [%i0],%o0 183 srlx %o0,48,%l0 184 andcc %l0,%l7,%l0 ! l0 = ex 185 beq,pn %icc,1f 186 andn %o0,%g1,%o0 ! o0 = lx 187 ba,pt %icc,2f 188 sub %o0,%g1,%o0 1891: 190 mov 1,%l0 1912: 192 193 ldx [%i1],%o1 194 srlx %o1,48,%l1 195 andcc %l1,%l7,%l1 ! l1 = ey 196 beq,pn %icc,1f 197 andn %o1,%g1,%o1 ! o1 = ly 198 ba,pt %icc,2f 199 sub %o1,%g1,%o1 2001: 201 mov 1,%l1 2022: 203 204 sub %l0,%l1,%l1 ! l1 = e = ex - ey 205 cmp %l1,114 ! see if we need to prenormalize 206 bge,pn %icc,1f 207 mov 0,%l6 ! l6 = round 208 mov 0,%o7 ! o7 = sticky 209 cmp %l1,64 210 bl,pt %icc,3f 211 ldx [%i1+8],%o2 ! o2 = frac 212 sllx %o2,1,%o7 ! lop off high order bit 213 srlx %o2,63,%l6 214 mov %o1,%o2 215 mov 0,%o1 216 sub %l1,64,%l1 2173: 218 tst %l1 219 beq,pn %icc,4f 220 sub %l1,1,%l2 221 mov 1,%o3 222 sllx %o3,%l2,%o3 223 sub %o3,1,%o3 224 and %o3,%o2,%o3 225 or %o3,%l6,%o3 226 or %o7,%o3,%o7 227 srlx %o2,%l2,%o4 228 and %o4,1,%l6 229 srlx %o2,%l1,%o2 230 mov 64,%l3 231 sub %l3,%l1,%l3 232 sllx %o1,%l3,%o5 233 or %o2,%o5,%o2 234 srlx %o1,%l1,%o1 2354: 236 ldx [%i0+8],%o3 237 add %o2,%o3,%o2 ! add, propagating carry 238 cmp %o2,%o3 239 bgeu,pt %xcc,5f 240 add %o0,%o1,%o0 241 add %o0,1,%o0 2425: 243 srlx %o0,49,%o5 ! if sum carried out, postnormalize 244 tst %o5 245 beq,pt %icc,2f 246 nop 247 or %o7,%l6,%o7 248 and %o2,1,%l6 249 srlx %o2,1,%o2 250 sllx %o0,63,%o3 251 or %o2,%o3,%o2 252 srlx %o0,1,%o0 253 ba,pt %icc,2f 254 add %l0,1,%l0 2551: 256 ldx [%i0+8],%o2 ! (full prenormalization shift case) 257 ldx [%i1+8],%o3 258 or %o1,%o3,%o7 2592: 260 261 add %o0,%g1,%o1 ! see if sum is tiny 262 srlx %o1,63,%l2 ! l2 = uflo 263 264 ld [%i3],%i4 ! get the rounding mode 265 srl %i4,30,%l3 ! l3 = rm 266 ld [%i2],%l4 ! l4 = z->l.msw 267 tst %l4 268 beq,pn %icc,1f 269 srl %l3,1,%l5 270 xor %l3,%l5,%l3 2711: 272 273 orcc %o7,%l6,%g0 ! see if we need to round 274 beq,pn %xcc,1f 275 andcc %l3,1,%g0 276 or %i4,1,%i4 277 bne,pn %icc,1f 278 tst %l3 279 bne,pn %icc,2f 280 tst %l6 281 beq,pn %icc,1f 282 and %o2,1,%o3 283 orcc %o3,%o7,%g0 284 beq,pn %xcc,1f 285 nop 2862: 287 addcc %o2,1,%o2 ! round up and check for carry out 288 bne,pt %xcc,1f 289 nop 290 add %o0,1,%o0 291 srlx %o0,49,%o1 292 tst %o1 293 beq,pt %icc,1f 294 nop 295 srlx %o0,1,%o0 296 add %l0,1,%l0 2971: 298 299 cmp %l0,%l7 ! check for overflow 300 bge,pn %icc,1f 301 addcc %o0,%g1,%g0 302 bl,pn %xcc,2f 303 sll %l0,16,%l1 304 or %l4,%l1,%l4 3052: 306 sllx %o0,16,%o1 307 srlx %o1,48,%o1 308 or %l4,%o1,%l4 309 st %l4,[%i2] 310 st %o0,[%i2+4] 311 stx %o2,[%i2+8] 312 tst %l2 ! see if we need to raise underflow 313 beq,pt %icc,3f 314 srl %i4,23,%i5 315 andcc %i5,4,%i5 316 ba,pt %icc,3f 317 or %i4,%i5,%i4 318 3191: 320 andcc %l3,1,%g0 321 bne,pn %icc,2f 322 or %i4,9,%i4 ! overflow 323 sll %l7,16,%l7 ! 7fff00000... 324 or %l4,%l7,%l4 325 st %l4,[%i2] 326 st %g0,[%i2+4] 327 ba,pt %icc,3f 328 stx %g0,[%i2+8] 3292: 330 mov -1,%o0 ! 7ffeffff... 331 sll %l7,16,%l7 332 add %o0,%l7,%l7 333 or %l4,%l7,%l4 334 st %l4,[%i2] 335 st %o0,[%i2+4] 336 stx %o0,[%i2+8] 337 3383: 339 st %i4,[%i3] 340 ret 341 restore 342 343 SET_SIZE(__quad_mag_add) 344 345! /* 346! * __quad_mag_sub(x, y, z, fsr) 347! * 348! * Sets *z = *x - *y, rounded according to the rounding mode in *fsr, 349! * and updates the current exceptions in *fsr. This routine assumes 350! * *x and *y are finite, with opposite signs (i.e., a subtraction of 351! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. 352! */ 353! void 354! __quad_mag_sub(const union longdouble *x, const union longdouble *y, 355! union longdouble *z, unsigned int *fsr) 356! { 357! unsigned long lx, ly, frac, sticky; 358! unsigned int ex, ey, gr, borrow, rm; 359! int e; 360! 361! /* get the leading significand double-words and exponents */ 362! ex = (x->ll.msll >> 48) & 0x7fff; 363! lx = x->ll.msll & ~0xffff000000000000ul; 364! if (ex == 0) 365! ex = 1; 366! else 367! lx |= 0x0001000000000000ul; 368! 369! ey = (y->ll.msll >> 48) & 0x7fff; 370! ly = y->ll.msll & ~0xffff000000000000ul; 371! if (ey == 0) 372! ey = 1; 373! else 374! ly |= 0x0001000000000000ul; 375! 376! /* prenormalize y */ 377! e = (int) ex - (int) ey; 378! gr = sticky = 0; 379! if (e > 114) { 380! sticky = ly | y->ll.frac; 381! ly = frac = 0; 382! } else { 383! frac = y->ll.frac; 384! if (e >= 64) { 385! gr = frac >> 62; 386! sticky = frac << 2; 387! frac = ly; 388! ly = 0; 389! e -= 64; 390! } 391! if (e > 1) { 392! sticky |= gr | (frac & ((1ul << (e - 2)) - 1)); 393! gr = (frac >> (e - 2)) & 3; 394! frac = (frac >> e) | (ly << (64 - e)); 395! ly >>= e; 396! } else if (e == 1) { 397! sticky |= (gr & 1); 398! gr = (gr >> 1) | ((frac & 1) << 1); 399! frac = (frac >> 1) | (ly << 63); 400! ly >>= 1; 401! } 402! } 403! 404! /* complement guard, round, and sticky as need be */ 405! gr <<= 1; 406! if (sticky) 407! gr |= 1; 408! gr = (-gr & 7); 409! if (gr) 410! if (++frac == 0) 411! ly++; 412! 413! /* subtract, propagating borrows */ 414! frac = x->ll.frac - frac; 415! lx -= ly; 416! if (frac > x->ll.frac) 417! lx--; 418! 419! /* get the rounding mode */ 420! rm = *fsr >> 30; 421! 422! /* handle zero result */ 423! if (!(lx | frac | gr)) { 424! z->l.msw = ((rm == FSR_RM)? 0x80000000 : 0); 425! z->l.frac2 = z->l.frac3 = z->l.frac4 = 0; 426! return; 427! } 428! 429! /* postnormalize */ 430! if (lx < 0x0001000000000000ul) { 431! /* if cancellation occurred or the exponent is 1, 432! the result is exact */ 433! if (lx < 0x0000800000000000ul || ex == 1) { 434! if ((lx | (frac & 0xfffe000000000000ul)) == 0 && 435! ex > 64) { 436! lx = frac; 437! frac = (unsigned long) gr << 61; 438! gr = 0; 439! ex -= 64; 440! } 441! while (lx < 0x0001000000000000ul && ex > 1) { 442! lx = (lx << 1) | (frac >> 63); 443! frac = (frac << 1) | (gr >> 2); 444! gr = 0; 445! ex--; 446! } 447! if (lx >= 0x0001000000000000ul) 448! z->l.msw |= (ex << 16); 449! z->l.msw |= ((lx >> 32) & 0xffff); 450! z->l.frac2 = (lx & 0xffffffff); 451! z->ll.frac = frac; 452! 453! /* if the result is tiny and underflow trapping is 454! enabled, simulate underflow */ 455! if (lx < 0x0001000000000000ul && (*fsr & FSR_UFM)) 456! *fsr |= FSR_UFC; 457! return; 458! } 459! 460! /* otherwise we only borrowed one place */ 461! lx = (lx << 1) | (frac >> 63); 462! frac = (frac << 1) | (gr >> 2); 463! gr &= 3; 464! ex--; 465! } 466! else 467! gr = (gr >> 1) | (gr & 1); 468! 469! /* fudge directed rounding modes as though the result were positive */ 470! if (z->l.msw) 471! rm ^= (rm >> 1); 472! 473! /* see if we need to round */ 474! if (gr) { 475! *fsr |= FSR_NXC; 476! 477! /* round up if necessary */ 478! if (rm == FSR_RP || (rm == FSR_RN && (gr & 2) && 479! ((gr & 1) || (frac & 1)))) { 480! if (++frac == 0) 481! if (++lx >= 0x0002000000000000ul) { 482! lx >>= 1; 483! ex++; 484! } 485! } 486! } 487! 488! /* store the result */ 489! z->l.msw |= (ex << 16) | ((lx >> 32) & 0xffff); 490! z->l.frac2 = (lx & 0xffffffff); 491! z->ll.frac = frac; 492! } 493 494 ENTRY(__quad_mag_sub) 495 save %sp,-SA(MINFRAME),%sp 496 497 sethi %hi(0xffff0000),%g1 498 sllx %g1,32,%g1 ! g1 = 0xffff000000000000 499 500 sethi %hi(0x7fff),%l7 501 or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff 502 503 ldx [%i0],%o0 504 srlx %o0,48,%l0 505 andcc %l0,%l7,%l0 ! l0 = ex 506 beq,pn %icc,1f 507 andn %o0,%g1,%o0 ! o0 = lx 508 ba,pt %icc,2f 509 sub %o0,%g1,%o0 5101: 511 mov 1,%l0 5122: 513 514 ldx [%i1],%o1 515 srlx %o1,48,%l1 516 andcc %l1,%l7,%l1 ! l1 = ey 517 beq,pn %icc,1f 518 andn %o1,%g1,%o1 ! o1 = ly 519 ba,pt %icc,2f 520 sub %o1,%g1,%o1 5211: 522 mov 1,%l1 5232: 524 525 sub %l0,%l1,%l1 ! l1 = e = ex - ey 526 cmp %l1,114 ! see if we need to prenormalize y 527 bg,pn %icc,1f 528 mov 0,%l6 ! l6 = gr 529 mov 0,%o7 ! o7 = sticky 530 cmp %l1,64 531 bl,pt %icc,3f 532 ldx [%i1+8],%o2 ! o2 = frac 533 srlx %o2,62,%l6 534 sllx %o2,2,%o7 ! lop off top two bits 535 mov %o1,%o2 536 mov 0,%o1 537 sub %l1,64,%l1 5383: 539 cmp %l1,1 540 ble,pn %icc,4f 541 sub %l1,2,%l2 ! shift more than one bit 542 mov 1,%o3 543 sllx %o3,%l2,%o3 544 sub %o3,1,%o3 545 and %o3,%o2,%o3 546 or %o3,%l6,%o3 547 or %o7,%o3,%o7 548 srlx %o2,%l2,%o4 549 and %o4,3,%l6 550 srlx %o2,%l1,%o2 551 mov 64,%l3 552 sub %l3,%l1,%l3 553 sllx %o1,%l3,%o5 554 or %o2,%o5,%o2 555 ba,pt %icc,2f 556 srlx %o1,%l1,%o1 5574: 558 bne,pn %icc,2f 559 and %l6,1,%o3 ! shift one bit 560 or %o7,%o3,%o7 561 and %o2,1,%o4 562 sllx %o4,1,%o4 563 srl %l6,1,%l6 564 or %l6,%o4,%l6 565 srlx %o2,1,%o2 566 sllx %o1,63,%o5 567 or %o2,%o5,%o2 568 ba,pt %icc,2f 569 srlx %o1,1,%o1 5701: 571 ldx [%i1+8],%o3 ! (full prenormalization shift case) 572 or %o1,%o3,%o7 573 mov 0,%o1 574 mov 0,%o2 5752: 576 577 tst %o7 ! complement guard, round, and 578 beq,pn %xcc,1f ! sticky as need be 579 sll %l6,1,%l6 580 or %l6,1,%l6 5811: 582 subcc %g0,%l6,%l6 583 beq,pn %icc,1f 584 and %l6,7,%l6 585 addcc %o2,1,%o2 586 beq,a,pn %xcc,1f 587 add %o1,1,%o1 5881: 589 590 ldx [%i0+8],%o3 ! subtract, propagating borrows 591 sub %o3,%o2,%o2 592 cmp %o3,%o2 593 bgeu,pt %xcc,5f 594 sub %o0,%o1,%o0 595 sub %o0,1,%o0 5965: 597 598 ld [%i3],%i4 ! get the rounding mode 599 srl %i4,30,%l3 ! l3 = rm 600 601 or %o0,%o2,%o1 ! look for zero result 602 orcc %o1,%l6,%g0 603 bne,pt %xcc,1f 604 srl %l3,1,%l4 605 and %l3,%l4,%l4 606 sll %l4,31,%l4 607 st %l4,[%i2] 608 st %g0,[%i2+4] 609 stx %g0,[%i2+8] 610 ret 611 restore 612 6131: 614 addcc %o0,%g1,%g0 ! postnormalize 615 bl,pt %xcc,1f 616 ld [%i2],%l4 ! l4 = z->l.msw 617 and %l6,1,%l5 ! (no cancellation or borrow case) 618 srl %l6,1,%l6 619 ba,pt %icc,2f 620 or %l6,%l5,%l6 6211: 622 srax %g1,1,%o7 623 addcc %o0,%o7,%g0 624 bl,pn %xcc,1f 625 cmp %l0,1 626 beq,pt %icc,1f 627 srlx %o2,63,%o3 ! borrowed one place 628 sllx %o0,1,%o0 629 or %o0,%o3,%o0 630 srl %l6,2,%o4 631 sllx %o2,1,%o2 632 or %o2,%o4,%o2 633 and %l6,3,%l6 634 ba,pt %icc,2f 635 sub %l0,1,%l0 6361: 637 srlx %o2,49,%o3 ! cancellation or tiny result 638 orcc %o0,%o3,%g0 639 bne,pt %xcc,1f 640 cmp %l0,64 641 ble,pn %icc,1f 642 nop 643 mov %o2,%o0 644 sllx %l6,61,%o2 645 mov 0,%l6 646 sub %l0,64,%l0 6471: 648 addcc %o0,%g1,%g0 ! normalization loop 649 bge,pn %xcc,1f 650 cmp %l0,1 651 ble,pn %icc,1f 652 srl %l6,2,%l6 653 srlx %o2,63,%o3 654 sllx %o0,1,%o0 655 or %o0,%o3,%o0 656 sllx %o2,1,%o2 657 or %o2,%l6,%o2 658 ba,pt %icc,1b 659 sub %l0,1,%l0 6601: 661 sllx %o0,16,%o1 662 srlx %o1,48,%l5 663 or %l4,%l5,%l4 664 addcc %o0,%g1,%g0 ! see if result is tiny 665 bl,pn %xcc,1f 666 sll %l0,16,%l5 667 or %l4,%l5,%l4 6681: 669 st %l4,[%i2] 670 st %o0,[%i2+4] 671 bge,pt %xcc,1f 672 stx %o2,[%i2+8] 673 srl %i4,23,%i5 674 andcc %i5,4,%g0 ! see if we need to raise underflow 675 beq,pt %icc,1f 676 or %i4,4,%i4 677 st %i4,[%i3] 6781: 679 ret 680 restore 681 6822: 683 tst %l4 ! fudge directect rounding modes 684 beq,pn %icc,1f 685 srl %l3,1,%l5 686 xor %l3,%l5,%l3 6871: 688 689 tst %l6 ! see if we need to round 690 beq,pn %icc,1f 691 or %i4,1,%i4 692 st %i4,[%i3] 693 andcc %l3,1,%g0 694 bne,pn %icc,1f 695 tst %l3 696 bne,pn %icc,2f 697 andcc %l6,2,%g0 698 beq,pn %icc,1f 699 or %l6,%o2,%o3 700 andcc %o3,1,%o3 701 beq,pn %xcc,1f 702 nop 7032: 704 addcc %o2,1,%o2 ! round up and check for carry 705 bne,pt %xcc,1f 706 nop 707 add %o0,1,%o0 708 srlx %o0,49,%o1 709 tst %o1 710 beq,pt %icc,1f 711 nop 712 srlx %o0,1,%o0 713 add %l0,1,%l0 7141: 715 716 sllx %o0,16,%o1 717 srlx %o1,48,%o1 718 or %l4,%o1,%l4 719 sll %l0,16,%l5 720 or %l4,%l5,%l4 721 st %l4,[%i2] 722 st %o0,[%i2+4] 723 stx %o2,[%i2+8] 724 ret 725 restore 726 727 SET_SIZE(__quad_mag_sub) 728