1 /* 2 * linux/arch/arm/vfp/vfpdouble.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33 #include <linux/kernel.h> 34 #include <linux/bitops.h> 35 36 #include <asm/div64.h> 37 #include <asm/ptrace.h> 38 #include <asm/vfp.h> 39 40 #include "vfpinstr.h" 41 #include "vfp.h" 42 43 static struct vfp_double vfp_double_default_qnan = { 44 .exponent = 2047, 45 .sign = 0, 46 .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, 47 }; 48 49 static void vfp_double_dump(const char *str, struct vfp_double *d) 50 { 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", 52 str, d->sign != 0, d->exponent, d->significand); 53 } 54 55 static void vfp_double_normalise_denormal(struct vfp_double *vd) 56 { 57 int bits = 31 - fls(vd->significand >> 32); 58 if (bits == 31) 59 bits = 62 - fls(vd->significand); 60 61 vfp_double_dump("normalise_denormal: in", vd); 62 63 if (bits) { 64 vd->exponent -= bits - 1; 65 vd->significand <<= bits; 66 } 67 68 vfp_double_dump("normalise_denormal: out", vd); 69 } 70 71 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) 72 { 73 u64 significand, incr; 74 int exponent, shift, underflow; 75 u32 rmode; 76 77 vfp_double_dump("pack: in", vd); 78 79 /* 80 * Infinities and NaNs are a special case. 81 */ 82 if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) 83 goto pack; 84 85 /* 86 * Special-case zero. 87 */ 88 if (vd->significand == 0) { 89 vd->exponent = 0; 90 goto pack; 91 } 92 93 exponent = vd->exponent; 94 significand = vd->significand; 95 96 shift = 32 - fls(significand >> 32); 97 if (shift == 32) 98 shift = 64 - fls(significand); 99 if (shift) { 100 exponent -= shift; 101 significand <<= shift; 102 } 103 104 #ifdef DEBUG 105 vd->exponent = exponent; 106 vd->significand = significand; 107 vfp_double_dump("pack: normalised", vd); 108 #endif 109 110 /* 111 * Tiny number? 112 */ 113 underflow = exponent < 0; 114 if (underflow) { 115 significand = vfp_shiftright64jamming(significand, -exponent); 116 exponent = 0; 117 #ifdef DEBUG 118 vd->exponent = exponent; 119 vd->significand = significand; 120 vfp_double_dump("pack: tiny number", vd); 121 #endif 122 if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) 123 underflow = 0; 124 } 125 126 /* 127 * Select rounding increment. 128 */ 129 incr = 0; 130 rmode = fpscr & FPSCR_RMODE_MASK; 131 132 if (rmode == FPSCR_ROUND_NEAREST) { 133 incr = 1ULL << VFP_DOUBLE_LOW_BITS; 134 if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) 135 incr -= 1; 136 } else if (rmode == FPSCR_ROUND_TOZERO) { 137 incr = 0; 138 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) 139 incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; 140 141 pr_debug("VFP: rounding increment = 0x%08llx\n", incr); 142 143 /* 144 * Is our rounding going to overflow? 145 */ 146 if ((significand + incr) < significand) { 147 exponent += 1; 148 significand = (significand >> 1) | (significand & 1); 149 incr >>= 1; 150 #ifdef DEBUG 151 vd->exponent = exponent; 152 vd->significand = significand; 153 vfp_double_dump("pack: overflow", vd); 154 #endif 155 } 156 157 /* 158 * If any of the low bits (which will be shifted out of the 159 * number) are non-zero, the result is inexact. 160 */ 161 if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) 162 exceptions |= FPSCR_IXC; 163 164 /* 165 * Do our rounding. 166 */ 167 significand += incr; 168 169 /* 170 * Infinity? 171 */ 172 if (exponent >= 2046) { 173 exceptions |= FPSCR_OFC | FPSCR_IXC; 174 if (incr == 0) { 175 vd->exponent = 2045; 176 vd->significand = 0x7fffffffffffffffULL; 177 } else { 178 vd->exponent = 2047; /* infinity */ 179 vd->significand = 0; 180 } 181 } else { 182 if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) 183 exponent = 0; 184 if (exponent || significand > 0x8000000000000000ULL) 185 underflow = 0; 186 if (underflow) 187 exceptions |= FPSCR_UFC; 188 vd->exponent = exponent; 189 vd->significand = significand >> 1; 190 } 191 192 pack: 193 vfp_double_dump("pack: final", vd); 194 { 195 s64 d = vfp_double_pack(vd); 196 pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, 197 dd, d, exceptions); 198 vfp_put_double(dd, d); 199 } 200 return exceptions; 201 } 202 203 /* 204 * Propagate the NaN, setting exceptions if it is signalling. 205 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 206 */ 207 static u32 208 vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, 209 struct vfp_double *vdm, u32 fpscr) 210 { 211 struct vfp_double *nan; 212 int tn, tm = 0; 213 214 tn = vfp_double_type(vdn); 215 216 if (vdm) 217 tm = vfp_double_type(vdm); 218 219 if (fpscr & FPSCR_DEFAULT_NAN) 220 /* 221 * Default NaN mode - always returns a quiet NaN 222 */ 223 nan = &vfp_double_default_qnan; 224 else { 225 /* 226 * Contemporary mode - select the first signalling 227 * NAN, or if neither are signalling, the first 228 * quiet NAN. 229 */ 230 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 231 nan = vdn; 232 else 233 nan = vdm; 234 /* 235 * Make the NaN quiet. 236 */ 237 nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 238 } 239 240 *vdd = *nan; 241 242 /* 243 * If one was a signalling NAN, raise invalid operation. 244 */ 245 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 246 } 247 248 /* 249 * Extended operations 250 */ 251 static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) 252 { 253 vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); 254 return 0; 255 } 256 257 static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) 258 { 259 vfp_put_double(dd, vfp_get_double(dm)); 260 return 0; 261 } 262 263 static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) 264 { 265 vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); 266 return 0; 267 } 268 269 static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) 270 { 271 struct vfp_double vdm, vdd; 272 int ret, tm; 273 274 vfp_double_unpack(&vdm, vfp_get_double(dm)); 275 tm = vfp_double_type(&vdm); 276 if (tm & (VFP_NAN|VFP_INFINITY)) { 277 struct vfp_double *vdp = &vdd; 278 279 if (tm & VFP_NAN) 280 ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); 281 else if (vdm.sign == 0) { 282 sqrt_copy: 283 vdp = &vdm; 284 ret = 0; 285 } else { 286 sqrt_invalid: 287 vdp = &vfp_double_default_qnan; 288 ret = FPSCR_IOC; 289 } 290 vfp_put_double(dd, vfp_double_pack(vdp)); 291 return ret; 292 } 293 294 /* 295 * sqrt(+/- 0) == +/- 0 296 */ 297 if (tm & VFP_ZERO) 298 goto sqrt_copy; 299 300 /* 301 * Normalise a denormalised number 302 */ 303 if (tm & VFP_DENORMAL) 304 vfp_double_normalise_denormal(&vdm); 305 306 /* 307 * sqrt(<0) = invalid 308 */ 309 if (vdm.sign) 310 goto sqrt_invalid; 311 312 vfp_double_dump("sqrt", &vdm); 313 314 /* 315 * Estimate the square root. 316 */ 317 vdd.sign = 0; 318 vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; 319 vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; 320 321 vfp_double_dump("sqrt estimate1", &vdd); 322 323 vdm.significand >>= 1 + (vdm.exponent & 1); 324 vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); 325 326 vfp_double_dump("sqrt estimate2", &vdd); 327 328 /* 329 * And now adjust. 330 */ 331 if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { 332 if (vdd.significand < 2) { 333 vdd.significand = ~0ULL; 334 } else { 335 u64 termh, terml, remh, reml; 336 vdm.significand <<= 2; 337 mul64to128(&termh, &terml, vdd.significand, vdd.significand); 338 sub128(&remh, &reml, vdm.significand, 0, termh, terml); 339 while ((s64)remh < 0) { 340 vdd.significand -= 1; 341 shift64left(&termh, &terml, vdd.significand); 342 terml |= 1; 343 add128(&remh, &reml, remh, reml, termh, terml); 344 } 345 vdd.significand |= (remh | reml) != 0; 346 } 347 } 348 vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); 349 350 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); 351 } 352 353 /* 354 * Equal := ZC 355 * Less than := N 356 * Greater than := C 357 * Unordered := CV 358 */ 359 static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) 360 { 361 s64 d, m; 362 u32 ret = 0; 363 364 m = vfp_get_double(dm); 365 if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { 366 ret |= FPSCR_C | FPSCR_V; 367 if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 368 /* 369 * Signalling NaN, or signalling on quiet NaN 370 */ 371 ret |= FPSCR_IOC; 372 } 373 374 d = vfp_get_double(dd); 375 if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { 376 ret |= FPSCR_C | FPSCR_V; 377 if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 378 /* 379 * Signalling NaN, or signalling on quiet NaN 380 */ 381 ret |= FPSCR_IOC; 382 } 383 384 if (ret == 0) { 385 if (d == m || vfp_double_packed_abs(d | m) == 0) { 386 /* 387 * equal 388 */ 389 ret |= FPSCR_Z | FPSCR_C; 390 } else if (vfp_double_packed_sign(d ^ m)) { 391 /* 392 * different signs 393 */ 394 if (vfp_double_packed_sign(d)) 395 /* 396 * d is negative, so d < m 397 */ 398 ret |= FPSCR_N; 399 else 400 /* 401 * d is positive, so d > m 402 */ 403 ret |= FPSCR_C; 404 } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { 405 /* 406 * d < m 407 */ 408 ret |= FPSCR_N; 409 } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { 410 /* 411 * d > m 412 */ 413 ret |= FPSCR_C; 414 } 415 } 416 417 return ret; 418 } 419 420 static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) 421 { 422 return vfp_compare(dd, 0, dm, fpscr); 423 } 424 425 static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) 426 { 427 return vfp_compare(dd, 1, dm, fpscr); 428 } 429 430 static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) 431 { 432 return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); 433 } 434 435 static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) 436 { 437 return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); 438 } 439 440 static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) 441 { 442 struct vfp_double vdm; 443 struct vfp_single vsd; 444 int tm; 445 u32 exceptions = 0; 446 447 vfp_double_unpack(&vdm, vfp_get_double(dm)); 448 449 tm = vfp_double_type(&vdm); 450 451 /* 452 * If we have a signalling NaN, signal invalid operation. 453 */ 454 if (tm == VFP_SNAN) 455 exceptions = FPSCR_IOC; 456 457 if (tm & VFP_DENORMAL) 458 vfp_double_normalise_denormal(&vdm); 459 460 vsd.sign = vdm.sign; 461 vsd.significand = vfp_hi64to32jamming(vdm.significand); 462 463 /* 464 * If we have an infinity or a NaN, the exponent must be 255 465 */ 466 if (tm & (VFP_INFINITY|VFP_NAN)) { 467 vsd.exponent = 255; 468 if (tm & VFP_NAN) 469 vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 470 goto pack_nan; 471 } else if (tm & VFP_ZERO) 472 vsd.exponent = 0; 473 else 474 vsd.exponent = vdm.exponent - (1023 - 127); 475 476 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); 477 478 pack_nan: 479 vfp_put_float(sd, vfp_single_pack(&vsd)); 480 return exceptions; 481 } 482 483 static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) 484 { 485 struct vfp_double vdm; 486 u32 m = vfp_get_float(dm); 487 488 vdm.sign = 0; 489 vdm.exponent = 1023 + 63 - 1; 490 vdm.significand = (u64)m; 491 492 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); 493 } 494 495 static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) 496 { 497 struct vfp_double vdm; 498 u32 m = vfp_get_float(dm); 499 500 vdm.sign = (m & 0x80000000) >> 16; 501 vdm.exponent = 1023 + 63 - 1; 502 vdm.significand = vdm.sign ? -m : m; 503 504 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); 505 } 506 507 static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) 508 { 509 struct vfp_double vdm; 510 u32 d, exceptions = 0; 511 int rmode = fpscr & FPSCR_RMODE_MASK; 512 int tm; 513 514 vfp_double_unpack(&vdm, vfp_get_double(dm)); 515 516 /* 517 * Do we have a denormalised number? 518 */ 519 tm = vfp_double_type(&vdm); 520 if (tm & VFP_DENORMAL) 521 exceptions |= FPSCR_IDC; 522 523 if (tm & VFP_NAN) 524 vdm.sign = 0; 525 526 if (vdm.exponent >= 1023 + 32) { 527 d = vdm.sign ? 0 : 0xffffffff; 528 exceptions = FPSCR_IOC; 529 } else if (vdm.exponent >= 1023 - 1) { 530 int shift = 1023 + 63 - vdm.exponent; 531 u64 rem, incr = 0; 532 533 /* 534 * 2^0 <= m < 2^32-2^8 535 */ 536 d = (vdm.significand << 1) >> shift; 537 rem = vdm.significand << (65 - shift); 538 539 if (rmode == FPSCR_ROUND_NEAREST) { 540 incr = 0x8000000000000000ULL; 541 if ((d & 1) == 0) 542 incr -= 1; 543 } else if (rmode == FPSCR_ROUND_TOZERO) { 544 incr = 0; 545 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 546 incr = ~0ULL; 547 } 548 549 if ((rem + incr) < rem) { 550 if (d < 0xffffffff) 551 d += 1; 552 else 553 exceptions |= FPSCR_IOC; 554 } 555 556 if (d && vdm.sign) { 557 d = 0; 558 exceptions |= FPSCR_IOC; 559 } else if (rem) 560 exceptions |= FPSCR_IXC; 561 } else { 562 d = 0; 563 if (vdm.exponent | vdm.significand) { 564 exceptions |= FPSCR_IXC; 565 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 566 d = 1; 567 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { 568 d = 0; 569 exceptions |= FPSCR_IOC; 570 } 571 } 572 } 573 574 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 575 576 vfp_put_float(sd, d); 577 578 return exceptions; 579 } 580 581 static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) 582 { 583 return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); 584 } 585 586 static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) 587 { 588 struct vfp_double vdm; 589 u32 d, exceptions = 0; 590 int rmode = fpscr & FPSCR_RMODE_MASK; 591 int tm; 592 593 vfp_double_unpack(&vdm, vfp_get_double(dm)); 594 vfp_double_dump("VDM", &vdm); 595 596 /* 597 * Do we have denormalised number? 598 */ 599 tm = vfp_double_type(&vdm); 600 if (tm & VFP_DENORMAL) 601 exceptions |= FPSCR_IDC; 602 603 if (tm & VFP_NAN) { 604 d = 0; 605 exceptions |= FPSCR_IOC; 606 } else if (vdm.exponent >= 1023 + 32) { 607 d = 0x7fffffff; 608 if (vdm.sign) 609 d = ~d; 610 exceptions |= FPSCR_IOC; 611 } else if (vdm.exponent >= 1023 - 1) { 612 int shift = 1023 + 63 - vdm.exponent; /* 58 */ 613 u64 rem, incr = 0; 614 615 d = (vdm.significand << 1) >> shift; 616 rem = vdm.significand << (65 - shift); 617 618 if (rmode == FPSCR_ROUND_NEAREST) { 619 incr = 0x8000000000000000ULL; 620 if ((d & 1) == 0) 621 incr -= 1; 622 } else if (rmode == FPSCR_ROUND_TOZERO) { 623 incr = 0; 624 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 625 incr = ~0ULL; 626 } 627 628 if ((rem + incr) < rem && d < 0xffffffff) 629 d += 1; 630 if (d > 0x7fffffff + (vdm.sign != 0)) { 631 d = 0x7fffffff + (vdm.sign != 0); 632 exceptions |= FPSCR_IOC; 633 } else if (rem) 634 exceptions |= FPSCR_IXC; 635 636 if (vdm.sign) 637 d = -d; 638 } else { 639 d = 0; 640 if (vdm.exponent | vdm.significand) { 641 exceptions |= FPSCR_IXC; 642 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 643 d = 1; 644 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) 645 d = -1; 646 } 647 } 648 649 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 650 651 vfp_put_float(sd, (s32)d); 652 653 return exceptions; 654 } 655 656 static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) 657 { 658 return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); 659 } 660 661 662 static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { 663 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, 664 [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, 665 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, 666 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, 667 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, 668 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, 669 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, 670 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, 671 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, 672 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, 673 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, 674 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, 675 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, 676 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, 677 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, 678 }; 679 680 681 682 683 static u32 684 vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, 685 struct vfp_double *vdm, u32 fpscr) 686 { 687 struct vfp_double *vdp; 688 u32 exceptions = 0; 689 int tn, tm; 690 691 tn = vfp_double_type(vdn); 692 tm = vfp_double_type(vdm); 693 694 if (tn & tm & VFP_INFINITY) { 695 /* 696 * Two infinities. Are they different signs? 697 */ 698 if (vdn->sign ^ vdm->sign) { 699 /* 700 * different signs -> invalid 701 */ 702 exceptions = FPSCR_IOC; 703 vdp = &vfp_double_default_qnan; 704 } else { 705 /* 706 * same signs -> valid 707 */ 708 vdp = vdn; 709 } 710 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 711 /* 712 * One infinity and one number -> infinity 713 */ 714 vdp = vdn; 715 } else { 716 /* 717 * 'n' is a NaN of some type 718 */ 719 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 720 } 721 *vdd = *vdp; 722 return exceptions; 723 } 724 725 static u32 726 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, 727 struct vfp_double *vdm, u32 fpscr) 728 { 729 u32 exp_diff; 730 u64 m_sig; 731 732 if (vdn->significand & (1ULL << 63) || 733 vdm->significand & (1ULL << 63)) { 734 pr_info("VFP: bad FP values in %s\n", __func__); 735 vfp_double_dump("VDN", vdn); 736 vfp_double_dump("VDM", vdm); 737 } 738 739 /* 740 * Ensure that 'n' is the largest magnitude number. Note that 741 * if 'n' and 'm' have equal exponents, we do not swap them. 742 * This ensures that NaN propagation works correctly. 743 */ 744 if (vdn->exponent < vdm->exponent) { 745 struct vfp_double *t = vdn; 746 vdn = vdm; 747 vdm = t; 748 } 749 750 /* 751 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 752 * infinity or a NaN here. 753 */ 754 if (vdn->exponent == 2047) 755 return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); 756 757 /* 758 * We have two proper numbers, where 'vdn' is the larger magnitude. 759 * 760 * Copy 'n' to 'd' before doing the arithmetic. 761 */ 762 *vdd = *vdn; 763 764 /* 765 * Align 'm' with the result. 766 */ 767 exp_diff = vdn->exponent - vdm->exponent; 768 m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); 769 770 /* 771 * If the signs are different, we are really subtracting. 772 */ 773 if (vdn->sign ^ vdm->sign) { 774 m_sig = vdn->significand - m_sig; 775 if ((s64)m_sig < 0) { 776 vdd->sign = vfp_sign_negate(vdd->sign); 777 m_sig = -m_sig; 778 } else if (m_sig == 0) { 779 vdd->sign = (fpscr & FPSCR_RMODE_MASK) == 780 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 781 } 782 } else { 783 m_sig += vdn->significand; 784 } 785 vdd->significand = m_sig; 786 787 return 0; 788 } 789 790 static u32 791 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, 792 struct vfp_double *vdm, u32 fpscr) 793 { 794 vfp_double_dump("VDN", vdn); 795 vfp_double_dump("VDM", vdm); 796 797 /* 798 * Ensure that 'n' is the largest magnitude number. Note that 799 * if 'n' and 'm' have equal exponents, we do not swap them. 800 * This ensures that NaN propagation works correctly. 801 */ 802 if (vdn->exponent < vdm->exponent) { 803 struct vfp_double *t = vdn; 804 vdn = vdm; 805 vdm = t; 806 pr_debug("VFP: swapping M <-> N\n"); 807 } 808 809 vdd->sign = vdn->sign ^ vdm->sign; 810 811 /* 812 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 813 */ 814 if (vdn->exponent == 2047) { 815 if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) 816 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 817 if ((vdm->exponent | vdm->significand) == 0) { 818 *vdd = vfp_double_default_qnan; 819 return FPSCR_IOC; 820 } 821 vdd->exponent = vdn->exponent; 822 vdd->significand = 0; 823 return 0; 824 } 825 826 /* 827 * If 'm' is zero, the result is always zero. In this case, 828 * 'n' may be zero or a number, but it doesn't matter which. 829 */ 830 if ((vdm->exponent | vdm->significand) == 0) { 831 vdd->exponent = 0; 832 vdd->significand = 0; 833 return 0; 834 } 835 836 /* 837 * We add 2 to the destination exponent for the same reason 838 * as the addition case - though this time we have +1 from 839 * each input operand. 840 */ 841 vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; 842 vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); 843 844 vfp_double_dump("VDD", vdd); 845 return 0; 846 } 847 848 #define NEG_MULTIPLY (1 << 0) 849 #define NEG_SUBTRACT (1 << 1) 850 851 static u32 852 vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) 853 { 854 struct vfp_double vdd, vdp, vdn, vdm; 855 u32 exceptions; 856 857 vfp_double_unpack(&vdn, vfp_get_double(dn)); 858 if (vdn.exponent == 0 && vdn.significand) 859 vfp_double_normalise_denormal(&vdn); 860 861 vfp_double_unpack(&vdm, vfp_get_double(dm)); 862 if (vdm.exponent == 0 && vdm.significand) 863 vfp_double_normalise_denormal(&vdm); 864 865 exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); 866 if (negate & NEG_MULTIPLY) 867 vdp.sign = vfp_sign_negate(vdp.sign); 868 869 vfp_double_unpack(&vdn, vfp_get_double(dd)); 870 if (negate & NEG_SUBTRACT) 871 vdn.sign = vfp_sign_negate(vdn.sign); 872 873 exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); 874 875 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); 876 } 877 878 /* 879 * Standard operations 880 */ 881 882 /* 883 * sd = sd + (sn * sm) 884 */ 885 static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) 886 { 887 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); 888 } 889 890 /* 891 * sd = sd - (sn * sm) 892 */ 893 static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) 894 { 895 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); 896 } 897 898 /* 899 * sd = -sd + (sn * sm) 900 */ 901 static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) 902 { 903 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); 904 } 905 906 /* 907 * sd = -sd - (sn * sm) 908 */ 909 static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) 910 { 911 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 912 } 913 914 /* 915 * sd = sn * sm 916 */ 917 static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) 918 { 919 struct vfp_double vdd, vdn, vdm; 920 u32 exceptions; 921 922 vfp_double_unpack(&vdn, vfp_get_double(dn)); 923 if (vdn.exponent == 0 && vdn.significand) 924 vfp_double_normalise_denormal(&vdn); 925 926 vfp_double_unpack(&vdm, vfp_get_double(dm)); 927 if (vdm.exponent == 0 && vdm.significand) 928 vfp_double_normalise_denormal(&vdm); 929 930 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 931 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); 932 } 933 934 /* 935 * sd = -(sn * sm) 936 */ 937 static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) 938 { 939 struct vfp_double vdd, vdn, vdm; 940 u32 exceptions; 941 942 vfp_double_unpack(&vdn, vfp_get_double(dn)); 943 if (vdn.exponent == 0 && vdn.significand) 944 vfp_double_normalise_denormal(&vdn); 945 946 vfp_double_unpack(&vdm, vfp_get_double(dm)); 947 if (vdm.exponent == 0 && vdm.significand) 948 vfp_double_normalise_denormal(&vdm); 949 950 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 951 vdd.sign = vfp_sign_negate(vdd.sign); 952 953 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); 954 } 955 956 /* 957 * sd = sn + sm 958 */ 959 static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) 960 { 961 struct vfp_double vdd, vdn, vdm; 962 u32 exceptions; 963 964 vfp_double_unpack(&vdn, vfp_get_double(dn)); 965 if (vdn.exponent == 0 && vdn.significand) 966 vfp_double_normalise_denormal(&vdn); 967 968 vfp_double_unpack(&vdm, vfp_get_double(dm)); 969 if (vdm.exponent == 0 && vdm.significand) 970 vfp_double_normalise_denormal(&vdm); 971 972 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 973 974 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); 975 } 976 977 /* 978 * sd = sn - sm 979 */ 980 static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) 981 { 982 struct vfp_double vdd, vdn, vdm; 983 u32 exceptions; 984 985 vfp_double_unpack(&vdn, vfp_get_double(dn)); 986 if (vdn.exponent == 0 && vdn.significand) 987 vfp_double_normalise_denormal(&vdn); 988 989 vfp_double_unpack(&vdm, vfp_get_double(dm)); 990 if (vdm.exponent == 0 && vdm.significand) 991 vfp_double_normalise_denormal(&vdm); 992 993 /* 994 * Subtraction is like addition, but with a negated operand. 995 */ 996 vdm.sign = vfp_sign_negate(vdm.sign); 997 998 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 999 1000 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); 1001 } 1002 1003 /* 1004 * sd = sn / sm 1005 */ 1006 static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) 1007 { 1008 struct vfp_double vdd, vdn, vdm; 1009 u32 exceptions = 0; 1010 int tm, tn; 1011 1012 vfp_double_unpack(&vdn, vfp_get_double(dn)); 1013 vfp_double_unpack(&vdm, vfp_get_double(dm)); 1014 1015 vdd.sign = vdn.sign ^ vdm.sign; 1016 1017 tn = vfp_double_type(&vdn); 1018 tm = vfp_double_type(&vdm); 1019 1020 /* 1021 * Is n a NAN? 1022 */ 1023 if (tn & VFP_NAN) 1024 goto vdn_nan; 1025 1026 /* 1027 * Is m a NAN? 1028 */ 1029 if (tm & VFP_NAN) 1030 goto vdm_nan; 1031 1032 /* 1033 * If n and m are infinity, the result is invalid 1034 * If n and m are zero, the result is invalid 1035 */ 1036 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1037 goto invalid; 1038 1039 /* 1040 * If n is infinity, the result is infinity 1041 */ 1042 if (tn & VFP_INFINITY) 1043 goto infinity; 1044 1045 /* 1046 * If m is zero, raise div0 exceptions 1047 */ 1048 if (tm & VFP_ZERO) 1049 goto divzero; 1050 1051 /* 1052 * If m is infinity, or n is zero, the result is zero 1053 */ 1054 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1055 goto zero; 1056 1057 if (tn & VFP_DENORMAL) 1058 vfp_double_normalise_denormal(&vdn); 1059 if (tm & VFP_DENORMAL) 1060 vfp_double_normalise_denormal(&vdm); 1061 1062 /* 1063 * Ok, we have two numbers, we can perform division. 1064 */ 1065 vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; 1066 vdm.significand <<= 1; 1067 if (vdm.significand <= (2 * vdn.significand)) { 1068 vdn.significand >>= 1; 1069 vdd.exponent++; 1070 } 1071 vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); 1072 if ((vdd.significand & 0x1ff) <= 2) { 1073 u64 termh, terml, remh, reml; 1074 mul64to128(&termh, &terml, vdm.significand, vdd.significand); 1075 sub128(&remh, &reml, vdn.significand, 0, termh, terml); 1076 while ((s64)remh < 0) { 1077 vdd.significand -= 1; 1078 add128(&remh, &reml, remh, reml, 0, vdm.significand); 1079 } 1080 vdd.significand |= (reml != 0); 1081 } 1082 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); 1083 1084 vdn_nan: 1085 exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); 1086 pack: 1087 vfp_put_double(dd, vfp_double_pack(&vdd)); 1088 return exceptions; 1089 1090 vdm_nan: 1091 exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); 1092 goto pack; 1093 1094 zero: 1095 vdd.exponent = 0; 1096 vdd.significand = 0; 1097 goto pack; 1098 1099 divzero: 1100 exceptions = FPSCR_DZC; 1101 infinity: 1102 vdd.exponent = 2047; 1103 vdd.significand = 0; 1104 goto pack; 1105 1106 invalid: 1107 vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); 1108 return FPSCR_IOC; 1109 } 1110 1111 static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { 1112 [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, 1113 [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, 1114 [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, 1115 [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, 1116 [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, 1117 [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, 1118 [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, 1119 [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, 1120 [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, 1121 }; 1122 1123 #define FREG_BANK(x) ((x) & 0x0c) 1124 #define FREG_IDX(x) ((x) & 3) 1125 1126 u32 vfp_double_cpdo(u32 inst, u32 fpscr) 1127 { 1128 u32 op = inst & FOP_MASK; 1129 u32 exceptions = 0; 1130 unsigned int dd = vfp_get_dd(inst); 1131 unsigned int dn = vfp_get_dn(inst); 1132 unsigned int dm = vfp_get_dm(inst); 1133 unsigned int vecitr, veclen, vecstride; 1134 u32 (*fop)(int, int, s32, u32); 1135 1136 veclen = fpscr & FPSCR_LENGTH_MASK; 1137 vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; 1138 1139 /* 1140 * If destination bank is zero, vector length is always '1'. 1141 * ARM DDI0100F C5.1.3, C5.3.2. 1142 */ 1143 if (FREG_BANK(dd) == 0) 1144 veclen = 0; 1145 1146 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1147 (veclen >> FPSCR_LENGTH_BIT) + 1); 1148 1149 fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)]; 1150 if (!fop) 1151 goto invalid; 1152 1153 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1154 u32 except; 1155 1156 if (op == FOP_EXT) 1157 pr_debug("VFP: itr%d (d%u) = op[%u] (d%u)\n", 1158 vecitr >> FPSCR_LENGTH_BIT, 1159 dd, dn, dm); 1160 else 1161 pr_debug("VFP: itr%d (d%u) = (d%u) op[%u] (d%u)\n", 1162 vecitr >> FPSCR_LENGTH_BIT, 1163 dd, dn, FOP_TO_IDX(op), dm); 1164 1165 except = fop(dd, dn, dm, fpscr); 1166 pr_debug("VFP: itr%d: exceptions=%08x\n", 1167 vecitr >> FPSCR_LENGTH_BIT, except); 1168 1169 exceptions |= except; 1170 1171 /* 1172 * This ensures that comparisons only operate on scalars; 1173 * comparisons always return with one FPSCR status bit set. 1174 */ 1175 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1176 break; 1177 1178 /* 1179 * CHECK: It appears to be undefined whether we stop when 1180 * we encounter an exception. We continue. 1181 */ 1182 1183 dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); 1184 dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); 1185 if (FREG_BANK(dm) != 0) 1186 dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); 1187 } 1188 return exceptions; 1189 1190 invalid: 1191 return ~0; 1192 } 1193