1 /* 2 * linux/arch/arm/vfp/vfpdouble.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33 #include <linux/kernel.h> 34 #include <linux/bitops.h> 35 36 #include <asm/div64.h> 37 #include <asm/ptrace.h> 38 #include <asm/vfp.h> 39 40 #include "vfpinstr.h" 41 #include "vfp.h" 42 43 static struct vfp_double vfp_double_default_qnan = { 44 .exponent = 2047, 45 .sign = 0, 46 .significand = VFP_DOUBLE_SIGNIFICAND_QNAN, 47 }; 48 49 static void vfp_double_dump(const char *str, struct vfp_double *d) 50 { 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n", 52 str, d->sign != 0, d->exponent, d->significand); 53 } 54 55 static void vfp_double_normalise_denormal(struct vfp_double *vd) 56 { 57 int bits = 31 - fls(vd->significand >> 32); 58 if (bits == 31) 59 bits = 62 - fls(vd->significand); 60 61 vfp_double_dump("normalise_denormal: in", vd); 62 63 if (bits) { 64 vd->exponent -= bits - 1; 65 vd->significand <<= bits; 66 } 67 68 vfp_double_dump("normalise_denormal: out", vd); 69 } 70 71 u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) 72 { 73 u64 significand, incr; 74 int exponent, shift, underflow; 75 u32 rmode; 76 77 vfp_double_dump("pack: in", vd); 78 79 /* 80 * Infinities and NaNs are a special case. 81 */ 82 if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) 83 goto pack; 84 85 /* 86 * Special-case zero. 87 */ 88 if (vd->significand == 0) { 89 vd->exponent = 0; 90 goto pack; 91 } 92 93 exponent = vd->exponent; 94 significand = vd->significand; 95 96 shift = 32 - fls(significand >> 32); 97 if (shift == 32) 98 shift = 64 - fls(significand); 99 if (shift) { 100 exponent -= shift; 101 significand <<= shift; 102 } 103 104 #ifdef DEBUG 105 vd->exponent = exponent; 106 vd->significand = significand; 107 vfp_double_dump("pack: normalised", vd); 108 #endif 109 110 /* 111 * Tiny number? 112 */ 113 underflow = exponent < 0; 114 if (underflow) { 115 significand = vfp_shiftright64jamming(significand, -exponent); 116 exponent = 0; 117 #ifdef DEBUG 118 vd->exponent = exponent; 119 vd->significand = significand; 120 vfp_double_dump("pack: tiny number", vd); 121 #endif 122 if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) 123 underflow = 0; 124 } 125 126 /* 127 * Select rounding increment. 128 */ 129 incr = 0; 130 rmode = fpscr & FPSCR_RMODE_MASK; 131 132 if (rmode == FPSCR_ROUND_NEAREST) { 133 incr = 1ULL << VFP_DOUBLE_LOW_BITS; 134 if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) 135 incr -= 1; 136 } else if (rmode == FPSCR_ROUND_TOZERO) { 137 incr = 0; 138 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) 139 incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; 140 141 pr_debug("VFP: rounding increment = 0x%08llx\n", incr); 142 143 /* 144 * Is our rounding going to overflow? 145 */ 146 if ((significand + incr) < significand) { 147 exponent += 1; 148 significand = (significand >> 1) | (significand & 1); 149 incr >>= 1; 150 #ifdef DEBUG 151 vd->exponent = exponent; 152 vd->significand = significand; 153 vfp_double_dump("pack: overflow", vd); 154 #endif 155 } 156 157 /* 158 * If any of the low bits (which will be shifted out of the 159 * number) are non-zero, the result is inexact. 160 */ 161 if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) 162 exceptions |= FPSCR_IXC; 163 164 /* 165 * Do our rounding. 166 */ 167 significand += incr; 168 169 /* 170 * Infinity? 171 */ 172 if (exponent >= 2046) { 173 exceptions |= FPSCR_OFC | FPSCR_IXC; 174 if (incr == 0) { 175 vd->exponent = 2045; 176 vd->significand = 0x7fffffffffffffffULL; 177 } else { 178 vd->exponent = 2047; /* infinity */ 179 vd->significand = 0; 180 } 181 } else { 182 if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) 183 exponent = 0; 184 if (exponent || significand > 0x8000000000000000ULL) 185 underflow = 0; 186 if (underflow) 187 exceptions |= FPSCR_UFC; 188 vd->exponent = exponent; 189 vd->significand = significand >> 1; 190 } 191 192 pack: 193 vfp_double_dump("pack: final", vd); 194 { 195 s64 d = vfp_double_pack(vd); 196 pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func, 197 dd, d, exceptions); 198 vfp_put_double(dd, d); 199 } 200 return exceptions & ~VFP_NAN_FLAG; 201 } 202 203 /* 204 * Propagate the NaN, setting exceptions if it is signalling. 205 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 206 */ 207 static u32 208 vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn, 209 struct vfp_double *vdm, u32 fpscr) 210 { 211 struct vfp_double *nan; 212 int tn, tm = 0; 213 214 tn = vfp_double_type(vdn); 215 216 if (vdm) 217 tm = vfp_double_type(vdm); 218 219 if (fpscr & FPSCR_DEFAULT_NAN) 220 /* 221 * Default NaN mode - always returns a quiet NaN 222 */ 223 nan = &vfp_double_default_qnan; 224 else { 225 /* 226 * Contemporary mode - select the first signalling 227 * NAN, or if neither are signalling, the first 228 * quiet NAN. 229 */ 230 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 231 nan = vdn; 232 else 233 nan = vdm; 234 /* 235 * Make the NaN quiet. 236 */ 237 nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 238 } 239 240 *vdd = *nan; 241 242 /* 243 * If one was a signalling NAN, raise invalid operation. 244 */ 245 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 246 } 247 248 /* 249 * Extended operations 250 */ 251 static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr) 252 { 253 vfp_put_double(dd, vfp_double_packed_abs(vfp_get_double(dm))); 254 return 0; 255 } 256 257 static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr) 258 { 259 vfp_put_double(dd, vfp_get_double(dm)); 260 return 0; 261 } 262 263 static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr) 264 { 265 vfp_put_double(dd, vfp_double_packed_negate(vfp_get_double(dm))); 266 return 0; 267 } 268 269 static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr) 270 { 271 struct vfp_double vdm, vdd; 272 int ret, tm; 273 274 vfp_double_unpack(&vdm, vfp_get_double(dm)); 275 tm = vfp_double_type(&vdm); 276 if (tm & (VFP_NAN|VFP_INFINITY)) { 277 struct vfp_double *vdp = &vdd; 278 279 if (tm & VFP_NAN) 280 ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr); 281 else if (vdm.sign == 0) { 282 sqrt_copy: 283 vdp = &vdm; 284 ret = 0; 285 } else { 286 sqrt_invalid: 287 vdp = &vfp_double_default_qnan; 288 ret = FPSCR_IOC; 289 } 290 vfp_put_double(dd, vfp_double_pack(vdp)); 291 return ret; 292 } 293 294 /* 295 * sqrt(+/- 0) == +/- 0 296 */ 297 if (tm & VFP_ZERO) 298 goto sqrt_copy; 299 300 /* 301 * Normalise a denormalised number 302 */ 303 if (tm & VFP_DENORMAL) 304 vfp_double_normalise_denormal(&vdm); 305 306 /* 307 * sqrt(<0) = invalid 308 */ 309 if (vdm.sign) 310 goto sqrt_invalid; 311 312 vfp_double_dump("sqrt", &vdm); 313 314 /* 315 * Estimate the square root. 316 */ 317 vdd.sign = 0; 318 vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023; 319 vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31; 320 321 vfp_double_dump("sqrt estimate1", &vdd); 322 323 vdm.significand >>= 1 + (vdm.exponent & 1); 324 vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand); 325 326 vfp_double_dump("sqrt estimate2", &vdd); 327 328 /* 329 * And now adjust. 330 */ 331 if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) { 332 if (vdd.significand < 2) { 333 vdd.significand = ~0ULL; 334 } else { 335 u64 termh, terml, remh, reml; 336 vdm.significand <<= 2; 337 mul64to128(&termh, &terml, vdd.significand, vdd.significand); 338 sub128(&remh, &reml, vdm.significand, 0, termh, terml); 339 while ((s64)remh < 0) { 340 vdd.significand -= 1; 341 shift64left(&termh, &terml, vdd.significand); 342 terml |= 1; 343 add128(&remh, &reml, remh, reml, termh, terml); 344 } 345 vdd.significand |= (remh | reml) != 0; 346 } 347 } 348 vdd.significand = vfp_shiftright64jamming(vdd.significand, 1); 349 350 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt"); 351 } 352 353 /* 354 * Equal := ZC 355 * Less than := N 356 * Greater than := C 357 * Unordered := CV 358 */ 359 static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr) 360 { 361 s64 d, m; 362 u32 ret = 0; 363 364 m = vfp_get_double(dm); 365 if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { 366 ret |= FPSCR_C | FPSCR_V; 367 if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 368 /* 369 * Signalling NaN, or signalling on quiet NaN 370 */ 371 ret |= FPSCR_IOC; 372 } 373 374 d = vfp_get_double(dd); 375 if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { 376 ret |= FPSCR_C | FPSCR_V; 377 if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) 378 /* 379 * Signalling NaN, or signalling on quiet NaN 380 */ 381 ret |= FPSCR_IOC; 382 } 383 384 if (ret == 0) { 385 if (d == m || vfp_double_packed_abs(d | m) == 0) { 386 /* 387 * equal 388 */ 389 ret |= FPSCR_Z | FPSCR_C; 390 } else if (vfp_double_packed_sign(d ^ m)) { 391 /* 392 * different signs 393 */ 394 if (vfp_double_packed_sign(d)) 395 /* 396 * d is negative, so d < m 397 */ 398 ret |= FPSCR_N; 399 else 400 /* 401 * d is positive, so d > m 402 */ 403 ret |= FPSCR_C; 404 } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { 405 /* 406 * d < m 407 */ 408 ret |= FPSCR_N; 409 } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { 410 /* 411 * d > m 412 */ 413 ret |= FPSCR_C; 414 } 415 } 416 417 return ret; 418 } 419 420 static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr) 421 { 422 return vfp_compare(dd, 0, dm, fpscr); 423 } 424 425 static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr) 426 { 427 return vfp_compare(dd, 1, dm, fpscr); 428 } 429 430 static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr) 431 { 432 return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr); 433 } 434 435 static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr) 436 { 437 return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr); 438 } 439 440 static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr) 441 { 442 struct vfp_double vdm; 443 struct vfp_single vsd; 444 int tm; 445 u32 exceptions = 0; 446 447 vfp_double_unpack(&vdm, vfp_get_double(dm)); 448 449 tm = vfp_double_type(&vdm); 450 451 /* 452 * If we have a signalling NaN, signal invalid operation. 453 */ 454 if (tm == VFP_SNAN) 455 exceptions = FPSCR_IOC; 456 457 if (tm & VFP_DENORMAL) 458 vfp_double_normalise_denormal(&vdm); 459 460 vsd.sign = vdm.sign; 461 vsd.significand = vfp_hi64to32jamming(vdm.significand); 462 463 /* 464 * If we have an infinity or a NaN, the exponent must be 255 465 */ 466 if (tm & (VFP_INFINITY|VFP_NAN)) { 467 vsd.exponent = 255; 468 if (tm & VFP_NAN) 469 vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 470 goto pack_nan; 471 } else if (tm & VFP_ZERO) 472 vsd.exponent = 0; 473 else 474 vsd.exponent = vdm.exponent - (1023 - 127); 475 476 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts"); 477 478 pack_nan: 479 vfp_put_float(sd, vfp_single_pack(&vsd)); 480 return exceptions; 481 } 482 483 static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr) 484 { 485 struct vfp_double vdm; 486 u32 m = vfp_get_float(dm); 487 488 vdm.sign = 0; 489 vdm.exponent = 1023 + 63 - 1; 490 vdm.significand = (u64)m; 491 492 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito"); 493 } 494 495 static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr) 496 { 497 struct vfp_double vdm; 498 u32 m = vfp_get_float(dm); 499 500 vdm.sign = (m & 0x80000000) >> 16; 501 vdm.exponent = 1023 + 63 - 1; 502 vdm.significand = vdm.sign ? -m : m; 503 504 return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito"); 505 } 506 507 static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr) 508 { 509 struct vfp_double vdm; 510 u32 d, exceptions = 0; 511 int rmode = fpscr & FPSCR_RMODE_MASK; 512 int tm; 513 514 vfp_double_unpack(&vdm, vfp_get_double(dm)); 515 516 /* 517 * Do we have a denormalised number? 518 */ 519 tm = vfp_double_type(&vdm); 520 if (tm & VFP_DENORMAL) 521 exceptions |= FPSCR_IDC; 522 523 if (tm & VFP_NAN) 524 vdm.sign = 0; 525 526 if (vdm.exponent >= 1023 + 32) { 527 d = vdm.sign ? 0 : 0xffffffff; 528 exceptions = FPSCR_IOC; 529 } else if (vdm.exponent >= 1023 - 1) { 530 int shift = 1023 + 63 - vdm.exponent; 531 u64 rem, incr = 0; 532 533 /* 534 * 2^0 <= m < 2^32-2^8 535 */ 536 d = (vdm.significand << 1) >> shift; 537 rem = vdm.significand << (65 - shift); 538 539 if (rmode == FPSCR_ROUND_NEAREST) { 540 incr = 0x8000000000000000ULL; 541 if ((d & 1) == 0) 542 incr -= 1; 543 } else if (rmode == FPSCR_ROUND_TOZERO) { 544 incr = 0; 545 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 546 incr = ~0ULL; 547 } 548 549 if ((rem + incr) < rem) { 550 if (d < 0xffffffff) 551 d += 1; 552 else 553 exceptions |= FPSCR_IOC; 554 } 555 556 if (d && vdm.sign) { 557 d = 0; 558 exceptions |= FPSCR_IOC; 559 } else if (rem) 560 exceptions |= FPSCR_IXC; 561 } else { 562 d = 0; 563 if (vdm.exponent | vdm.significand) { 564 exceptions |= FPSCR_IXC; 565 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 566 d = 1; 567 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) { 568 d = 0; 569 exceptions |= FPSCR_IOC; 570 } 571 } 572 } 573 574 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 575 576 vfp_put_float(sd, d); 577 578 return exceptions; 579 } 580 581 static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr) 582 { 583 return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO); 584 } 585 586 static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr) 587 { 588 struct vfp_double vdm; 589 u32 d, exceptions = 0; 590 int rmode = fpscr & FPSCR_RMODE_MASK; 591 592 vfp_double_unpack(&vdm, vfp_get_double(dm)); 593 vfp_double_dump("VDM", &vdm); 594 595 /* 596 * Do we have denormalised number? 597 */ 598 if (vfp_double_type(&vdm) & VFP_DENORMAL) 599 exceptions |= FPSCR_IDC; 600 601 if (vdm.exponent >= 1023 + 32) { 602 d = 0x7fffffff; 603 if (vdm.sign) 604 d = ~d; 605 exceptions |= FPSCR_IOC; 606 } else if (vdm.exponent >= 1023 - 1) { 607 int shift = 1023 + 63 - vdm.exponent; /* 58 */ 608 u64 rem, incr = 0; 609 610 d = (vdm.significand << 1) >> shift; 611 rem = vdm.significand << (65 - shift); 612 613 if (rmode == FPSCR_ROUND_NEAREST) { 614 incr = 0x8000000000000000ULL; 615 if ((d & 1) == 0) 616 incr -= 1; 617 } else if (rmode == FPSCR_ROUND_TOZERO) { 618 incr = 0; 619 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) { 620 incr = ~0ULL; 621 } 622 623 if ((rem + incr) < rem && d < 0xffffffff) 624 d += 1; 625 if (d > 0x7fffffff + (vdm.sign != 0)) { 626 d = 0x7fffffff + (vdm.sign != 0); 627 exceptions |= FPSCR_IOC; 628 } else if (rem) 629 exceptions |= FPSCR_IXC; 630 631 if (vdm.sign) 632 d = -d; 633 } else { 634 d = 0; 635 if (vdm.exponent | vdm.significand) { 636 exceptions |= FPSCR_IXC; 637 if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0) 638 d = 1; 639 else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) 640 d = -1; 641 } 642 } 643 644 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 645 646 vfp_put_float(sd, (s32)d); 647 648 return exceptions; 649 } 650 651 static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr) 652 { 653 return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO); 654 } 655 656 657 static u32 (* const fop_extfns[32])(int dd, int unused, int dm, u32 fpscr) = { 658 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_double_fcpy, 659 [FEXT_TO_IDX(FEXT_FABS)] = vfp_double_fabs, 660 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_double_fneg, 661 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_double_fsqrt, 662 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_double_fcmp, 663 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_double_fcmpe, 664 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_double_fcmpz, 665 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_double_fcmpez, 666 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_double_fcvts, 667 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_double_fuito, 668 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_double_fsito, 669 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_double_ftoui, 670 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_double_ftouiz, 671 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_double_ftosi, 672 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_double_ftosiz, 673 }; 674 675 676 677 678 static u32 679 vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn, 680 struct vfp_double *vdm, u32 fpscr) 681 { 682 struct vfp_double *vdp; 683 u32 exceptions = 0; 684 int tn, tm; 685 686 tn = vfp_double_type(vdn); 687 tm = vfp_double_type(vdm); 688 689 if (tn & tm & VFP_INFINITY) { 690 /* 691 * Two infinities. Are they different signs? 692 */ 693 if (vdn->sign ^ vdm->sign) { 694 /* 695 * different signs -> invalid 696 */ 697 exceptions = FPSCR_IOC; 698 vdp = &vfp_double_default_qnan; 699 } else { 700 /* 701 * same signs -> valid 702 */ 703 vdp = vdn; 704 } 705 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 706 /* 707 * One infinity and one number -> infinity 708 */ 709 vdp = vdn; 710 } else { 711 /* 712 * 'n' is a NaN of some type 713 */ 714 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 715 } 716 *vdd = *vdp; 717 return exceptions; 718 } 719 720 static u32 721 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, 722 struct vfp_double *vdm, u32 fpscr) 723 { 724 u32 exp_diff; 725 u64 m_sig; 726 727 if (vdn->significand & (1ULL << 63) || 728 vdm->significand & (1ULL << 63)) { 729 pr_info("VFP: bad FP values in %s\n", __func__); 730 vfp_double_dump("VDN", vdn); 731 vfp_double_dump("VDM", vdm); 732 } 733 734 /* 735 * Ensure that 'n' is the largest magnitude number. Note that 736 * if 'n' and 'm' have equal exponents, we do not swap them. 737 * This ensures that NaN propagation works correctly. 738 */ 739 if (vdn->exponent < vdm->exponent) { 740 struct vfp_double *t = vdn; 741 vdn = vdm; 742 vdm = t; 743 } 744 745 /* 746 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 747 * infinity or a NaN here. 748 */ 749 if (vdn->exponent == 2047) 750 return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr); 751 752 /* 753 * We have two proper numbers, where 'vdn' is the larger magnitude. 754 * 755 * Copy 'n' to 'd' before doing the arithmetic. 756 */ 757 *vdd = *vdn; 758 759 /* 760 * Align 'm' with the result. 761 */ 762 exp_diff = vdn->exponent - vdm->exponent; 763 m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff); 764 765 /* 766 * If the signs are different, we are really subtracting. 767 */ 768 if (vdn->sign ^ vdm->sign) { 769 m_sig = vdn->significand - m_sig; 770 if ((s64)m_sig < 0) { 771 vdd->sign = vfp_sign_negate(vdd->sign); 772 m_sig = -m_sig; 773 } else if (m_sig == 0) { 774 vdd->sign = (fpscr & FPSCR_RMODE_MASK) == 775 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 776 } 777 } else { 778 m_sig += vdn->significand; 779 } 780 vdd->significand = m_sig; 781 782 return 0; 783 } 784 785 static u32 786 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, 787 struct vfp_double *vdm, u32 fpscr) 788 { 789 vfp_double_dump("VDN", vdn); 790 vfp_double_dump("VDM", vdm); 791 792 /* 793 * Ensure that 'n' is the largest magnitude number. Note that 794 * if 'n' and 'm' have equal exponents, we do not swap them. 795 * This ensures that NaN propagation works correctly. 796 */ 797 if (vdn->exponent < vdm->exponent) { 798 struct vfp_double *t = vdn; 799 vdn = vdm; 800 vdm = t; 801 pr_debug("VFP: swapping M <-> N\n"); 802 } 803 804 vdd->sign = vdn->sign ^ vdm->sign; 805 806 /* 807 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 808 */ 809 if (vdn->exponent == 2047) { 810 if (vdn->significand || (vdm->exponent == 2047 && vdm->significand)) 811 return vfp_propagate_nan(vdd, vdn, vdm, fpscr); 812 if ((vdm->exponent | vdm->significand) == 0) { 813 *vdd = vfp_double_default_qnan; 814 return FPSCR_IOC; 815 } 816 vdd->exponent = vdn->exponent; 817 vdd->significand = 0; 818 return 0; 819 } 820 821 /* 822 * If 'm' is zero, the result is always zero. In this case, 823 * 'n' may be zero or a number, but it doesn't matter which. 824 */ 825 if ((vdm->exponent | vdm->significand) == 0) { 826 vdd->exponent = 0; 827 vdd->significand = 0; 828 return 0; 829 } 830 831 /* 832 * We add 2 to the destination exponent for the same reason 833 * as the addition case - though this time we have +1 from 834 * each input operand. 835 */ 836 vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2; 837 vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand); 838 839 vfp_double_dump("VDD", vdd); 840 return 0; 841 } 842 843 #define NEG_MULTIPLY (1 << 0) 844 #define NEG_SUBTRACT (1 << 1) 845 846 static u32 847 vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func) 848 { 849 struct vfp_double vdd, vdp, vdn, vdm; 850 u32 exceptions; 851 852 vfp_double_unpack(&vdn, vfp_get_double(dn)); 853 if (vdn.exponent == 0 && vdn.significand) 854 vfp_double_normalise_denormal(&vdn); 855 856 vfp_double_unpack(&vdm, vfp_get_double(dm)); 857 if (vdm.exponent == 0 && vdm.significand) 858 vfp_double_normalise_denormal(&vdm); 859 860 exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr); 861 if (negate & NEG_MULTIPLY) 862 vdp.sign = vfp_sign_negate(vdp.sign); 863 864 vfp_double_unpack(&vdn, vfp_get_double(dd)); 865 if (negate & NEG_SUBTRACT) 866 vdn.sign = vfp_sign_negate(vdn.sign); 867 868 exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr); 869 870 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func); 871 } 872 873 /* 874 * Standard operations 875 */ 876 877 /* 878 * sd = sd + (sn * sm) 879 */ 880 static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr) 881 { 882 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac"); 883 } 884 885 /* 886 * sd = sd - (sn * sm) 887 */ 888 static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr) 889 { 890 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac"); 891 } 892 893 /* 894 * sd = -sd + (sn * sm) 895 */ 896 static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr) 897 { 898 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc"); 899 } 900 901 /* 902 * sd = -sd - (sn * sm) 903 */ 904 static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr) 905 { 906 return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 907 } 908 909 /* 910 * sd = sn * sm 911 */ 912 static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr) 913 { 914 struct vfp_double vdd, vdn, vdm; 915 u32 exceptions; 916 917 vfp_double_unpack(&vdn, vfp_get_double(dn)); 918 if (vdn.exponent == 0 && vdn.significand) 919 vfp_double_normalise_denormal(&vdn); 920 921 vfp_double_unpack(&vdm, vfp_get_double(dm)); 922 if (vdm.exponent == 0 && vdm.significand) 923 vfp_double_normalise_denormal(&vdm); 924 925 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 926 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul"); 927 } 928 929 /* 930 * sd = -(sn * sm) 931 */ 932 static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr) 933 { 934 struct vfp_double vdd, vdn, vdm; 935 u32 exceptions; 936 937 vfp_double_unpack(&vdn, vfp_get_double(dn)); 938 if (vdn.exponent == 0 && vdn.significand) 939 vfp_double_normalise_denormal(&vdn); 940 941 vfp_double_unpack(&vdm, vfp_get_double(dm)); 942 if (vdm.exponent == 0 && vdm.significand) 943 vfp_double_normalise_denormal(&vdm); 944 945 exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr); 946 vdd.sign = vfp_sign_negate(vdd.sign); 947 948 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul"); 949 } 950 951 /* 952 * sd = sn + sm 953 */ 954 static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr) 955 { 956 struct vfp_double vdd, vdn, vdm; 957 u32 exceptions; 958 959 vfp_double_unpack(&vdn, vfp_get_double(dn)); 960 if (vdn.exponent == 0 && vdn.significand) 961 vfp_double_normalise_denormal(&vdn); 962 963 vfp_double_unpack(&vdm, vfp_get_double(dm)); 964 if (vdm.exponent == 0 && vdm.significand) 965 vfp_double_normalise_denormal(&vdm); 966 967 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 968 969 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd"); 970 } 971 972 /* 973 * sd = sn - sm 974 */ 975 static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr) 976 { 977 struct vfp_double vdd, vdn, vdm; 978 u32 exceptions; 979 980 vfp_double_unpack(&vdn, vfp_get_double(dn)); 981 if (vdn.exponent == 0 && vdn.significand) 982 vfp_double_normalise_denormal(&vdn); 983 984 vfp_double_unpack(&vdm, vfp_get_double(dm)); 985 if (vdm.exponent == 0 && vdm.significand) 986 vfp_double_normalise_denormal(&vdm); 987 988 /* 989 * Subtraction is like addition, but with a negated operand. 990 */ 991 vdm.sign = vfp_sign_negate(vdm.sign); 992 993 exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr); 994 995 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub"); 996 } 997 998 /* 999 * sd = sn / sm 1000 */ 1001 static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr) 1002 { 1003 struct vfp_double vdd, vdn, vdm; 1004 u32 exceptions = 0; 1005 int tm, tn; 1006 1007 vfp_double_unpack(&vdn, vfp_get_double(dn)); 1008 vfp_double_unpack(&vdm, vfp_get_double(dm)); 1009 1010 vdd.sign = vdn.sign ^ vdm.sign; 1011 1012 tn = vfp_double_type(&vdn); 1013 tm = vfp_double_type(&vdm); 1014 1015 /* 1016 * Is n a NAN? 1017 */ 1018 if (tn & VFP_NAN) 1019 goto vdn_nan; 1020 1021 /* 1022 * Is m a NAN? 1023 */ 1024 if (tm & VFP_NAN) 1025 goto vdm_nan; 1026 1027 /* 1028 * If n and m are infinity, the result is invalid 1029 * If n and m are zero, the result is invalid 1030 */ 1031 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1032 goto invalid; 1033 1034 /* 1035 * If n is infinity, the result is infinity 1036 */ 1037 if (tn & VFP_INFINITY) 1038 goto infinity; 1039 1040 /* 1041 * If m is zero, raise div0 exceptions 1042 */ 1043 if (tm & VFP_ZERO) 1044 goto divzero; 1045 1046 /* 1047 * If m is infinity, or n is zero, the result is zero 1048 */ 1049 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1050 goto zero; 1051 1052 if (tn & VFP_DENORMAL) 1053 vfp_double_normalise_denormal(&vdn); 1054 if (tm & VFP_DENORMAL) 1055 vfp_double_normalise_denormal(&vdm); 1056 1057 /* 1058 * Ok, we have two numbers, we can perform division. 1059 */ 1060 vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1; 1061 vdm.significand <<= 1; 1062 if (vdm.significand <= (2 * vdn.significand)) { 1063 vdn.significand >>= 1; 1064 vdd.exponent++; 1065 } 1066 vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand); 1067 if ((vdd.significand & 0x1ff) <= 2) { 1068 u64 termh, terml, remh, reml; 1069 mul64to128(&termh, &terml, vdm.significand, vdd.significand); 1070 sub128(&remh, &reml, vdn.significand, 0, termh, terml); 1071 while ((s64)remh < 0) { 1072 vdd.significand -= 1; 1073 add128(&remh, &reml, remh, reml, 0, vdm.significand); 1074 } 1075 vdd.significand |= (reml != 0); 1076 } 1077 return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv"); 1078 1079 vdn_nan: 1080 exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr); 1081 pack: 1082 vfp_put_double(dd, vfp_double_pack(&vdd)); 1083 return exceptions; 1084 1085 vdm_nan: 1086 exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr); 1087 goto pack; 1088 1089 zero: 1090 vdd.exponent = 0; 1091 vdd.significand = 0; 1092 goto pack; 1093 1094 divzero: 1095 exceptions = FPSCR_DZC; 1096 infinity: 1097 vdd.exponent = 2047; 1098 vdd.significand = 0; 1099 goto pack; 1100 1101 invalid: 1102 vfp_put_double(dd, vfp_double_pack(&vfp_double_default_qnan)); 1103 return FPSCR_IOC; 1104 } 1105 1106 static u32 (* const fop_fns[16])(int dd, int dn, int dm, u32 fpscr) = { 1107 [FOP_TO_IDX(FOP_FMAC)] = vfp_double_fmac, 1108 [FOP_TO_IDX(FOP_FNMAC)] = vfp_double_fnmac, 1109 [FOP_TO_IDX(FOP_FMSC)] = vfp_double_fmsc, 1110 [FOP_TO_IDX(FOP_FNMSC)] = vfp_double_fnmsc, 1111 [FOP_TO_IDX(FOP_FMUL)] = vfp_double_fmul, 1112 [FOP_TO_IDX(FOP_FNMUL)] = vfp_double_fnmul, 1113 [FOP_TO_IDX(FOP_FADD)] = vfp_double_fadd, 1114 [FOP_TO_IDX(FOP_FSUB)] = vfp_double_fsub, 1115 [FOP_TO_IDX(FOP_FDIV)] = vfp_double_fdiv, 1116 }; 1117 1118 #define FREG_BANK(x) ((x) & 0x0c) 1119 #define FREG_IDX(x) ((x) & 3) 1120 1121 u32 vfp_double_cpdo(u32 inst, u32 fpscr) 1122 { 1123 u32 op = inst & FOP_MASK; 1124 u32 exceptions = 0; 1125 unsigned int dd = vfp_get_sd(inst); 1126 unsigned int dn = vfp_get_sn(inst); 1127 unsigned int dm = vfp_get_sm(inst); 1128 unsigned int vecitr, veclen, vecstride; 1129 u32 (*fop)(int, int, s32, u32); 1130 1131 veclen = fpscr & FPSCR_LENGTH_MASK; 1132 vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK)) * 2; 1133 1134 /* 1135 * If destination bank is zero, vector length is always '1'. 1136 * ARM DDI0100F C5.1.3, C5.3.2. 1137 */ 1138 if (FREG_BANK(dd) == 0) 1139 veclen = 0; 1140 1141 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1142 (veclen >> FPSCR_LENGTH_BIT) + 1); 1143 1144 fop = (op == FOP_EXT) ? fop_extfns[dn] : fop_fns[FOP_TO_IDX(op)]; 1145 if (!fop) 1146 goto invalid; 1147 1148 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1149 u32 except; 1150 1151 if (op == FOP_EXT) 1152 pr_debug("VFP: itr%d (d%u.%u) = op[%u] (d%u.%u)\n", 1153 vecitr >> FPSCR_LENGTH_BIT, 1154 dd >> 1, dd & 1, dn, 1155 dm >> 1, dm & 1); 1156 else 1157 pr_debug("VFP: itr%d (d%u.%u) = (d%u.%u) op[%u] (d%u.%u)\n", 1158 vecitr >> FPSCR_LENGTH_BIT, 1159 dd >> 1, dd & 1, 1160 dn >> 1, dn & 1, 1161 FOP_TO_IDX(op), 1162 dm >> 1, dm & 1); 1163 1164 except = fop(dd, dn, dm, fpscr); 1165 pr_debug("VFP: itr%d: exceptions=%08x\n", 1166 vecitr >> FPSCR_LENGTH_BIT, except); 1167 1168 exceptions |= except; 1169 1170 /* 1171 * This ensures that comparisons only operate on scalars; 1172 * comparisons always return with one FPSCR status bit set. 1173 */ 1174 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1175 break; 1176 1177 /* 1178 * CHECK: It appears to be undefined whether we stop when 1179 * we encounter an exception. We continue. 1180 */ 1181 1182 dd = FREG_BANK(dd) + ((FREG_IDX(dd) + vecstride) & 6); 1183 dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 6); 1184 if (FREG_BANK(dm) != 0) 1185 dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 6); 1186 } 1187 return exceptions; 1188 1189 invalid: 1190 return ~0; 1191 } 1192