1 /* 2 * linux/arch/arm/vfp/vfpsingle.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33 #include <linux/kernel.h> 34 #include <linux/bitops.h> 35 36 #include <asm/div64.h> 37 #include <asm/ptrace.h> 38 #include <asm/vfp.h> 39 40 #include "vfpinstr.h" 41 #include "vfp.h" 42 43 static struct vfp_single vfp_single_default_qnan = { 44 .exponent = 255, 45 .sign = 0, 46 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 47 }; 48 49 static void vfp_single_dump(const char *str, struct vfp_single *s) 50 { 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 52 str, s->sign != 0, s->exponent, s->significand); 53 } 54 55 static void vfp_single_normalise_denormal(struct vfp_single *vs) 56 { 57 int bits = 31 - fls(vs->significand); 58 59 vfp_single_dump("normalise_denormal: in", vs); 60 61 if (bits) { 62 vs->exponent -= bits - 1; 63 vs->significand <<= bits; 64 } 65 66 vfp_single_dump("normalise_denormal: out", vs); 67 } 68 69 #ifndef DEBUG 70 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 71 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 72 #else 73 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 74 #endif 75 { 76 u32 significand, incr, rmode; 77 int exponent, shift, underflow; 78 79 vfp_single_dump("pack: in", vs); 80 81 /* 82 * Infinities and NaNs are a special case. 83 */ 84 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 85 goto pack; 86 87 /* 88 * Special-case zero. 89 */ 90 if (vs->significand == 0) { 91 vs->exponent = 0; 92 goto pack; 93 } 94 95 exponent = vs->exponent; 96 significand = vs->significand; 97 98 /* 99 * Normalise first. Note that we shift the significand up to 100 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 101 * significant bit. 102 */ 103 shift = 32 - fls(significand); 104 if (shift < 32 && shift) { 105 exponent -= shift; 106 significand <<= shift; 107 } 108 109 #ifdef DEBUG 110 vs->exponent = exponent; 111 vs->significand = significand; 112 vfp_single_dump("pack: normalised", vs); 113 #endif 114 115 /* 116 * Tiny number? 117 */ 118 underflow = exponent < 0; 119 if (underflow) { 120 significand = vfp_shiftright32jamming(significand, -exponent); 121 exponent = 0; 122 #ifdef DEBUG 123 vs->exponent = exponent; 124 vs->significand = significand; 125 vfp_single_dump("pack: tiny number", vs); 126 #endif 127 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 128 underflow = 0; 129 } 130 131 /* 132 * Select rounding increment. 133 */ 134 incr = 0; 135 rmode = fpscr & FPSCR_RMODE_MASK; 136 137 if (rmode == FPSCR_ROUND_NEAREST) { 138 incr = 1 << VFP_SINGLE_LOW_BITS; 139 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 140 incr -= 1; 141 } else if (rmode == FPSCR_ROUND_TOZERO) { 142 incr = 0; 143 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 144 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 145 146 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 147 148 /* 149 * Is our rounding going to overflow? 150 */ 151 if ((significand + incr) < significand) { 152 exponent += 1; 153 significand = (significand >> 1) | (significand & 1); 154 incr >>= 1; 155 #ifdef DEBUG 156 vs->exponent = exponent; 157 vs->significand = significand; 158 vfp_single_dump("pack: overflow", vs); 159 #endif 160 } 161 162 /* 163 * If any of the low bits (which will be shifted out of the 164 * number) are non-zero, the result is inexact. 165 */ 166 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 167 exceptions |= FPSCR_IXC; 168 169 /* 170 * Do our rounding. 171 */ 172 significand += incr; 173 174 /* 175 * Infinity? 176 */ 177 if (exponent >= 254) { 178 exceptions |= FPSCR_OFC | FPSCR_IXC; 179 if (incr == 0) { 180 vs->exponent = 253; 181 vs->significand = 0x7fffffff; 182 } else { 183 vs->exponent = 255; /* infinity */ 184 vs->significand = 0; 185 } 186 } else { 187 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 188 exponent = 0; 189 if (exponent || significand > 0x80000000) 190 underflow = 0; 191 if (underflow) 192 exceptions |= FPSCR_UFC; 193 vs->exponent = exponent; 194 vs->significand = significand >> 1; 195 } 196 197 pack: 198 vfp_single_dump("pack: final", vs); 199 { 200 s32 d = vfp_single_pack(vs); 201 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 202 sd, d, exceptions); 203 vfp_put_float(d, sd); 204 } 205 206 return exceptions; 207 } 208 209 /* 210 * Propagate the NaN, setting exceptions if it is signalling. 211 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 212 */ 213 static u32 214 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 215 struct vfp_single *vsm, u32 fpscr) 216 { 217 struct vfp_single *nan; 218 int tn, tm = 0; 219 220 tn = vfp_single_type(vsn); 221 222 if (vsm) 223 tm = vfp_single_type(vsm); 224 225 if (fpscr & FPSCR_DEFAULT_NAN) 226 /* 227 * Default NaN mode - always returns a quiet NaN 228 */ 229 nan = &vfp_single_default_qnan; 230 else { 231 /* 232 * Contemporary mode - select the first signalling 233 * NAN, or if neither are signalling, the first 234 * quiet NAN. 235 */ 236 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 237 nan = vsn; 238 else 239 nan = vsm; 240 /* 241 * Make the NaN quiet. 242 */ 243 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 244 } 245 246 *vsd = *nan; 247 248 /* 249 * If one was a signalling NAN, raise invalid operation. 250 */ 251 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 252 } 253 254 255 /* 256 * Extended operations 257 */ 258 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 259 { 260 vfp_put_float(vfp_single_packed_abs(m), sd); 261 return 0; 262 } 263 264 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 265 { 266 vfp_put_float(m, sd); 267 return 0; 268 } 269 270 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 271 { 272 vfp_put_float(vfp_single_packed_negate(m), sd); 273 return 0; 274 } 275 276 static const u16 sqrt_oddadjust[] = { 277 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 278 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 279 }; 280 281 static const u16 sqrt_evenadjust[] = { 282 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 283 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 284 }; 285 286 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 287 { 288 int index; 289 u32 z, a; 290 291 if ((significand & 0xc0000000) != 0x40000000) { 292 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); 293 } 294 295 a = significand << 1; 296 index = (a >> 27) & 15; 297 if (exponent & 1) { 298 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 299 z = ((a / z) << 14) + (z << 15); 300 a >>= 1; 301 } else { 302 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 303 z = a / z + z; 304 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 305 if (z <= a) 306 return (s32)a >> 1; 307 } 308 { 309 u64 v = (u64)a << 31; 310 do_div(v, z); 311 return v + (z >> 1); 312 } 313 } 314 315 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 316 { 317 struct vfp_single vsm, vsd; 318 int ret, tm; 319 320 vfp_single_unpack(&vsm, m); 321 tm = vfp_single_type(&vsm); 322 if (tm & (VFP_NAN|VFP_INFINITY)) { 323 struct vfp_single *vsp = &vsd; 324 325 if (tm & VFP_NAN) 326 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 327 else if (vsm.sign == 0) { 328 sqrt_copy: 329 vsp = &vsm; 330 ret = 0; 331 } else { 332 sqrt_invalid: 333 vsp = &vfp_single_default_qnan; 334 ret = FPSCR_IOC; 335 } 336 vfp_put_float(vfp_single_pack(vsp), sd); 337 return ret; 338 } 339 340 /* 341 * sqrt(+/- 0) == +/- 0 342 */ 343 if (tm & VFP_ZERO) 344 goto sqrt_copy; 345 346 /* 347 * Normalise a denormalised number 348 */ 349 if (tm & VFP_DENORMAL) 350 vfp_single_normalise_denormal(&vsm); 351 352 /* 353 * sqrt(<0) = invalid 354 */ 355 if (vsm.sign) 356 goto sqrt_invalid; 357 358 vfp_single_dump("sqrt", &vsm); 359 360 /* 361 * Estimate the square root. 362 */ 363 vsd.sign = 0; 364 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 365 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 366 367 vfp_single_dump("sqrt estimate", &vsd); 368 369 /* 370 * And now adjust. 371 */ 372 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 373 if (vsd.significand < 2) { 374 vsd.significand = 0xffffffff; 375 } else { 376 u64 term; 377 s64 rem; 378 vsm.significand <<= !(vsm.exponent & 1); 379 term = (u64)vsd.significand * vsd.significand; 380 rem = ((u64)vsm.significand << 32) - term; 381 382 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 383 384 while (rem < 0) { 385 vsd.significand -= 1; 386 rem += ((u64)vsd.significand << 1) | 1; 387 } 388 vsd.significand |= rem != 0; 389 } 390 } 391 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 392 393 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 394 } 395 396 /* 397 * Equal := ZC 398 * Less than := N 399 * Greater than := C 400 * Unordered := CV 401 */ 402 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 403 { 404 s32 d; 405 u32 ret = 0; 406 407 d = vfp_get_float(sd); 408 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 409 ret |= FPSCR_C | FPSCR_V; 410 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 411 /* 412 * Signalling NaN, or signalling on quiet NaN 413 */ 414 ret |= FPSCR_IOC; 415 } 416 417 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 418 ret |= FPSCR_C | FPSCR_V; 419 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 420 /* 421 * Signalling NaN, or signalling on quiet NaN 422 */ 423 ret |= FPSCR_IOC; 424 } 425 426 if (ret == 0) { 427 if (d == m || vfp_single_packed_abs(d | m) == 0) { 428 /* 429 * equal 430 */ 431 ret |= FPSCR_Z | FPSCR_C; 432 } else if (vfp_single_packed_sign(d ^ m)) { 433 /* 434 * different signs 435 */ 436 if (vfp_single_packed_sign(d)) 437 /* 438 * d is negative, so d < m 439 */ 440 ret |= FPSCR_N; 441 else 442 /* 443 * d is positive, so d > m 444 */ 445 ret |= FPSCR_C; 446 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 447 /* 448 * d < m 449 */ 450 ret |= FPSCR_N; 451 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 452 /* 453 * d > m 454 */ 455 ret |= FPSCR_C; 456 } 457 } 458 return ret; 459 } 460 461 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 462 { 463 return vfp_compare(sd, 0, m, fpscr); 464 } 465 466 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 467 { 468 return vfp_compare(sd, 1, m, fpscr); 469 } 470 471 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 472 { 473 return vfp_compare(sd, 0, 0, fpscr); 474 } 475 476 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 477 { 478 return vfp_compare(sd, 1, 0, fpscr); 479 } 480 481 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 482 { 483 struct vfp_single vsm; 484 struct vfp_double vdd; 485 int tm; 486 u32 exceptions = 0; 487 488 vfp_single_unpack(&vsm, m); 489 490 tm = vfp_single_type(&vsm); 491 492 /* 493 * If we have a signalling NaN, signal invalid operation. 494 */ 495 if (tm == VFP_SNAN) 496 exceptions = FPSCR_IOC; 497 498 if (tm & VFP_DENORMAL) 499 vfp_single_normalise_denormal(&vsm); 500 501 vdd.sign = vsm.sign; 502 vdd.significand = (u64)vsm.significand << 32; 503 504 /* 505 * If we have an infinity or NaN, the exponent must be 2047. 506 */ 507 if (tm & (VFP_INFINITY|VFP_NAN)) { 508 vdd.exponent = 2047; 509 if (tm == VFP_QNAN) 510 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 511 goto pack_nan; 512 } else if (tm & VFP_ZERO) 513 vdd.exponent = 0; 514 else 515 vdd.exponent = vsm.exponent + (1023 - 127); 516 517 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 518 519 pack_nan: 520 vfp_put_double(vfp_double_pack(&vdd), dd); 521 return exceptions; 522 } 523 524 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 525 { 526 struct vfp_single vs; 527 528 vs.sign = 0; 529 vs.exponent = 127 + 31 - 1; 530 vs.significand = (u32)m; 531 532 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 533 } 534 535 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 536 { 537 struct vfp_single vs; 538 539 vs.sign = (m & 0x80000000) >> 16; 540 vs.exponent = 127 + 31 - 1; 541 vs.significand = vs.sign ? -m : m; 542 543 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 544 } 545 546 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 547 { 548 struct vfp_single vsm; 549 u32 d, exceptions = 0; 550 int rmode = fpscr & FPSCR_RMODE_MASK; 551 int tm; 552 553 vfp_single_unpack(&vsm, m); 554 vfp_single_dump("VSM", &vsm); 555 556 /* 557 * Do we have a denormalised number? 558 */ 559 tm = vfp_single_type(&vsm); 560 if (tm & VFP_DENORMAL) 561 exceptions |= FPSCR_IDC; 562 563 if (tm & VFP_NAN) 564 vsm.sign = 0; 565 566 if (vsm.exponent >= 127 + 32) { 567 d = vsm.sign ? 0 : 0xffffffff; 568 exceptions = FPSCR_IOC; 569 } else if (vsm.exponent >= 127 - 1) { 570 int shift = 127 + 31 - vsm.exponent; 571 u32 rem, incr = 0; 572 573 /* 574 * 2^0 <= m < 2^32-2^8 575 */ 576 d = (vsm.significand << 1) >> shift; 577 rem = vsm.significand << (33 - shift); 578 579 if (rmode == FPSCR_ROUND_NEAREST) { 580 incr = 0x80000000; 581 if ((d & 1) == 0) 582 incr -= 1; 583 } else if (rmode == FPSCR_ROUND_TOZERO) { 584 incr = 0; 585 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 586 incr = ~0; 587 } 588 589 if ((rem + incr) < rem) { 590 if (d < 0xffffffff) 591 d += 1; 592 else 593 exceptions |= FPSCR_IOC; 594 } 595 596 if (d && vsm.sign) { 597 d = 0; 598 exceptions |= FPSCR_IOC; 599 } else if (rem) 600 exceptions |= FPSCR_IXC; 601 } else { 602 d = 0; 603 if (vsm.exponent | vsm.significand) { 604 exceptions |= FPSCR_IXC; 605 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 606 d = 1; 607 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 608 d = 0; 609 exceptions |= FPSCR_IOC; 610 } 611 } 612 } 613 614 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 615 616 vfp_put_float(d, sd); 617 618 return exceptions; 619 } 620 621 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 622 { 623 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 624 } 625 626 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 627 { 628 struct vfp_single vsm; 629 u32 d, exceptions = 0; 630 int rmode = fpscr & FPSCR_RMODE_MASK; 631 int tm; 632 633 vfp_single_unpack(&vsm, m); 634 vfp_single_dump("VSM", &vsm); 635 636 /* 637 * Do we have a denormalised number? 638 */ 639 tm = vfp_single_type(&vsm); 640 if (vfp_single_type(&vsm) & VFP_DENORMAL) 641 exceptions |= FPSCR_IDC; 642 643 if (tm & VFP_NAN) { 644 d = 0; 645 exceptions |= FPSCR_IOC; 646 } else if (vsm.exponent >= 127 + 32) { 647 /* 648 * m >= 2^31-2^7: invalid 649 */ 650 d = 0x7fffffff; 651 if (vsm.sign) 652 d = ~d; 653 exceptions |= FPSCR_IOC; 654 } else if (vsm.exponent >= 127 - 1) { 655 int shift = 127 + 31 - vsm.exponent; 656 u32 rem, incr = 0; 657 658 /* 2^0 <= m <= 2^31-2^7 */ 659 d = (vsm.significand << 1) >> shift; 660 rem = vsm.significand << (33 - shift); 661 662 if (rmode == FPSCR_ROUND_NEAREST) { 663 incr = 0x80000000; 664 if ((d & 1) == 0) 665 incr -= 1; 666 } else if (rmode == FPSCR_ROUND_TOZERO) { 667 incr = 0; 668 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 669 incr = ~0; 670 } 671 672 if ((rem + incr) < rem && d < 0xffffffff) 673 d += 1; 674 if (d > 0x7fffffff + (vsm.sign != 0)) { 675 d = 0x7fffffff + (vsm.sign != 0); 676 exceptions |= FPSCR_IOC; 677 } else if (rem) 678 exceptions |= FPSCR_IXC; 679 680 if (vsm.sign) 681 d = -d; 682 } else { 683 d = 0; 684 if (vsm.exponent | vsm.significand) { 685 exceptions |= FPSCR_IXC; 686 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 687 d = 1; 688 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 689 d = -1; 690 } 691 } 692 693 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 694 695 vfp_put_float((s32)d, sd); 696 697 return exceptions; 698 } 699 700 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 701 { 702 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 703 } 704 705 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { 706 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, 707 [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, 708 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, 709 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, 710 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, 711 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, 712 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, 713 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, 714 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, 715 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, 716 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, 717 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, 718 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, 719 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, 720 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, 721 }; 722 723 724 725 726 727 static u32 728 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 729 struct vfp_single *vsm, u32 fpscr) 730 { 731 struct vfp_single *vsp; 732 u32 exceptions = 0; 733 int tn, tm; 734 735 tn = vfp_single_type(vsn); 736 tm = vfp_single_type(vsm); 737 738 if (tn & tm & VFP_INFINITY) { 739 /* 740 * Two infinities. Are they different signs? 741 */ 742 if (vsn->sign ^ vsm->sign) { 743 /* 744 * different signs -> invalid 745 */ 746 exceptions = FPSCR_IOC; 747 vsp = &vfp_single_default_qnan; 748 } else { 749 /* 750 * same signs -> valid 751 */ 752 vsp = vsn; 753 } 754 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 755 /* 756 * One infinity and one number -> infinity 757 */ 758 vsp = vsn; 759 } else { 760 /* 761 * 'n' is a NaN of some type 762 */ 763 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 764 } 765 *vsd = *vsp; 766 return exceptions; 767 } 768 769 static u32 770 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 771 struct vfp_single *vsm, u32 fpscr) 772 { 773 u32 exp_diff, m_sig; 774 775 if (vsn->significand & 0x80000000 || 776 vsm->significand & 0x80000000) { 777 pr_info("VFP: bad FP values in %s\n", __func__); 778 vfp_single_dump("VSN", vsn); 779 vfp_single_dump("VSM", vsm); 780 } 781 782 /* 783 * Ensure that 'n' is the largest magnitude number. Note that 784 * if 'n' and 'm' have equal exponents, we do not swap them. 785 * This ensures that NaN propagation works correctly. 786 */ 787 if (vsn->exponent < vsm->exponent) { 788 struct vfp_single *t = vsn; 789 vsn = vsm; 790 vsm = t; 791 } 792 793 /* 794 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 795 * infinity or a NaN here. 796 */ 797 if (vsn->exponent == 255) 798 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 799 800 /* 801 * We have two proper numbers, where 'vsn' is the larger magnitude. 802 * 803 * Copy 'n' to 'd' before doing the arithmetic. 804 */ 805 *vsd = *vsn; 806 807 /* 808 * Align both numbers. 809 */ 810 exp_diff = vsn->exponent - vsm->exponent; 811 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 812 813 /* 814 * If the signs are different, we are really subtracting. 815 */ 816 if (vsn->sign ^ vsm->sign) { 817 m_sig = vsn->significand - m_sig; 818 if ((s32)m_sig < 0) { 819 vsd->sign = vfp_sign_negate(vsd->sign); 820 m_sig = -m_sig; 821 } else if (m_sig == 0) { 822 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 823 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 824 } 825 } else { 826 m_sig = vsn->significand + m_sig; 827 } 828 vsd->significand = m_sig; 829 830 return 0; 831 } 832 833 static u32 834 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 835 { 836 vfp_single_dump("VSN", vsn); 837 vfp_single_dump("VSM", vsm); 838 839 /* 840 * Ensure that 'n' is the largest magnitude number. Note that 841 * if 'n' and 'm' have equal exponents, we do not swap them. 842 * This ensures that NaN propagation works correctly. 843 */ 844 if (vsn->exponent < vsm->exponent) { 845 struct vfp_single *t = vsn; 846 vsn = vsm; 847 vsm = t; 848 pr_debug("VFP: swapping M <-> N\n"); 849 } 850 851 vsd->sign = vsn->sign ^ vsm->sign; 852 853 /* 854 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 855 */ 856 if (vsn->exponent == 255) { 857 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 858 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 859 if ((vsm->exponent | vsm->significand) == 0) { 860 *vsd = vfp_single_default_qnan; 861 return FPSCR_IOC; 862 } 863 vsd->exponent = vsn->exponent; 864 vsd->significand = 0; 865 return 0; 866 } 867 868 /* 869 * If 'm' is zero, the result is always zero. In this case, 870 * 'n' may be zero or a number, but it doesn't matter which. 871 */ 872 if ((vsm->exponent | vsm->significand) == 0) { 873 vsd->exponent = 0; 874 vsd->significand = 0; 875 return 0; 876 } 877 878 /* 879 * We add 2 to the destination exponent for the same reason as 880 * the addition case - though this time we have +1 from each 881 * input operand. 882 */ 883 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 884 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 885 886 vfp_single_dump("VSD", vsd); 887 return 0; 888 } 889 890 #define NEG_MULTIPLY (1 << 0) 891 #define NEG_SUBTRACT (1 << 1) 892 893 static u32 894 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 895 { 896 struct vfp_single vsd, vsp, vsn, vsm; 897 u32 exceptions; 898 s32 v; 899 900 v = vfp_get_float(sn); 901 pr_debug("VFP: s%u = %08x\n", sn, v); 902 vfp_single_unpack(&vsn, v); 903 if (vsn.exponent == 0 && vsn.significand) 904 vfp_single_normalise_denormal(&vsn); 905 906 vfp_single_unpack(&vsm, m); 907 if (vsm.exponent == 0 && vsm.significand) 908 vfp_single_normalise_denormal(&vsm); 909 910 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 911 if (negate & NEG_MULTIPLY) 912 vsp.sign = vfp_sign_negate(vsp.sign); 913 914 v = vfp_get_float(sd); 915 pr_debug("VFP: s%u = %08x\n", sd, v); 916 vfp_single_unpack(&vsn, v); 917 if (negate & NEG_SUBTRACT) 918 vsn.sign = vfp_sign_negate(vsn.sign); 919 920 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 921 922 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 923 } 924 925 /* 926 * Standard operations 927 */ 928 929 /* 930 * sd = sd + (sn * sm) 931 */ 932 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 933 { 934 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 935 } 936 937 /* 938 * sd = sd - (sn * sm) 939 */ 940 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 941 { 942 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 943 } 944 945 /* 946 * sd = -sd + (sn * sm) 947 */ 948 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 949 { 950 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 951 } 952 953 /* 954 * sd = -sd - (sn * sm) 955 */ 956 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 957 { 958 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 959 } 960 961 /* 962 * sd = sn * sm 963 */ 964 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 965 { 966 struct vfp_single vsd, vsn, vsm; 967 u32 exceptions; 968 s32 n = vfp_get_float(sn); 969 970 pr_debug("VFP: s%u = %08x\n", sn, n); 971 972 vfp_single_unpack(&vsn, n); 973 if (vsn.exponent == 0 && vsn.significand) 974 vfp_single_normalise_denormal(&vsn); 975 976 vfp_single_unpack(&vsm, m); 977 if (vsm.exponent == 0 && vsm.significand) 978 vfp_single_normalise_denormal(&vsm); 979 980 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 981 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 982 } 983 984 /* 985 * sd = -(sn * sm) 986 */ 987 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 988 { 989 struct vfp_single vsd, vsn, vsm; 990 u32 exceptions; 991 s32 n = vfp_get_float(sn); 992 993 pr_debug("VFP: s%u = %08x\n", sn, n); 994 995 vfp_single_unpack(&vsn, n); 996 if (vsn.exponent == 0 && vsn.significand) 997 vfp_single_normalise_denormal(&vsn); 998 999 vfp_single_unpack(&vsm, m); 1000 if (vsm.exponent == 0 && vsm.significand) 1001 vfp_single_normalise_denormal(&vsm); 1002 1003 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 1004 vsd.sign = vfp_sign_negate(vsd.sign); 1005 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 1006 } 1007 1008 /* 1009 * sd = sn + sm 1010 */ 1011 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1012 { 1013 struct vfp_single vsd, vsn, vsm; 1014 u32 exceptions; 1015 s32 n = vfp_get_float(sn); 1016 1017 pr_debug("VFP: s%u = %08x\n", sn, n); 1018 1019 /* 1020 * Unpack and normalise denormals. 1021 */ 1022 vfp_single_unpack(&vsn, n); 1023 if (vsn.exponent == 0 && vsn.significand) 1024 vfp_single_normalise_denormal(&vsn); 1025 1026 vfp_single_unpack(&vsm, m); 1027 if (vsm.exponent == 0 && vsm.significand) 1028 vfp_single_normalise_denormal(&vsm); 1029 1030 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1031 1032 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1033 } 1034 1035 /* 1036 * sd = sn - sm 1037 */ 1038 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1039 { 1040 /* 1041 * Subtraction is addition with one sign inverted. 1042 */ 1043 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1044 } 1045 1046 /* 1047 * sd = sn / sm 1048 */ 1049 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1050 { 1051 struct vfp_single vsd, vsn, vsm; 1052 u32 exceptions = 0; 1053 s32 n = vfp_get_float(sn); 1054 int tm, tn; 1055 1056 pr_debug("VFP: s%u = %08x\n", sn, n); 1057 1058 vfp_single_unpack(&vsn, n); 1059 vfp_single_unpack(&vsm, m); 1060 1061 vsd.sign = vsn.sign ^ vsm.sign; 1062 1063 tn = vfp_single_type(&vsn); 1064 tm = vfp_single_type(&vsm); 1065 1066 /* 1067 * Is n a NAN? 1068 */ 1069 if (tn & VFP_NAN) 1070 goto vsn_nan; 1071 1072 /* 1073 * Is m a NAN? 1074 */ 1075 if (tm & VFP_NAN) 1076 goto vsm_nan; 1077 1078 /* 1079 * If n and m are infinity, the result is invalid 1080 * If n and m are zero, the result is invalid 1081 */ 1082 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1083 goto invalid; 1084 1085 /* 1086 * If n is infinity, the result is infinity 1087 */ 1088 if (tn & VFP_INFINITY) 1089 goto infinity; 1090 1091 /* 1092 * If m is zero, raise div0 exception 1093 */ 1094 if (tm & VFP_ZERO) 1095 goto divzero; 1096 1097 /* 1098 * If m is infinity, or n is zero, the result is zero 1099 */ 1100 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1101 goto zero; 1102 1103 if (tn & VFP_DENORMAL) 1104 vfp_single_normalise_denormal(&vsn); 1105 if (tm & VFP_DENORMAL) 1106 vfp_single_normalise_denormal(&vsm); 1107 1108 /* 1109 * Ok, we have two numbers, we can perform division. 1110 */ 1111 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1112 vsm.significand <<= 1; 1113 if (vsm.significand <= (2 * vsn.significand)) { 1114 vsn.significand >>= 1; 1115 vsd.exponent++; 1116 } 1117 { 1118 u64 significand = (u64)vsn.significand << 32; 1119 do_div(significand, vsm.significand); 1120 vsd.significand = significand; 1121 } 1122 if ((vsd.significand & 0x3f) == 0) 1123 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1124 1125 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1126 1127 vsn_nan: 1128 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1129 pack: 1130 vfp_put_float(vfp_single_pack(&vsd), sd); 1131 return exceptions; 1132 1133 vsm_nan: 1134 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1135 goto pack; 1136 1137 zero: 1138 vsd.exponent = 0; 1139 vsd.significand = 0; 1140 goto pack; 1141 1142 divzero: 1143 exceptions = FPSCR_DZC; 1144 infinity: 1145 vsd.exponent = 255; 1146 vsd.significand = 0; 1147 goto pack; 1148 1149 invalid: 1150 vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd); 1151 return FPSCR_IOC; 1152 } 1153 1154 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { 1155 [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, 1156 [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, 1157 [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, 1158 [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, 1159 [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, 1160 [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, 1161 [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, 1162 [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, 1163 [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, 1164 }; 1165 1166 #define FREG_BANK(x) ((x) & 0x18) 1167 #define FREG_IDX(x) ((x) & 7) 1168 1169 u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1170 { 1171 u32 op = inst & FOP_MASK; 1172 u32 exceptions = 0; 1173 unsigned int dest; 1174 unsigned int sn = vfp_get_sn(inst); 1175 unsigned int sm = vfp_get_sm(inst); 1176 unsigned int vecitr, veclen, vecstride; 1177 u32 (*fop)(int, int, s32, u32); 1178 1179 veclen = fpscr & FPSCR_LENGTH_MASK; 1180 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1181 1182 /* 1183 * fcvtsd takes a dN register number as destination, not sN. 1184 * Technically, if bit 0 of dd is set, this is an invalid 1185 * instruction. However, we ignore this for efficiency. 1186 * It also only operates on scalars. 1187 */ 1188 if ((inst & FEXT_MASK) == FEXT_FCVT) { 1189 veclen = 0; 1190 dest = vfp_get_dd(inst); 1191 } else 1192 dest = vfp_get_sd(inst); 1193 1194 /* 1195 * If destination bank is zero, vector length is always '1'. 1196 * ARM DDI0100F C5.1.3, C5.3.2. 1197 */ 1198 if (FREG_BANK(dest) == 0) 1199 veclen = 0; 1200 1201 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1202 (veclen >> FPSCR_LENGTH_BIT) + 1); 1203 1204 fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)]; 1205 if (!fop) 1206 goto invalid; 1207 1208 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1209 s32 m = vfp_get_float(sm); 1210 u32 except; 1211 1212 if (op == FOP_EXT && (inst & FEXT_MASK) == FEXT_FCVT) 1213 pr_debug("VFP: itr%d (d%u) = op[%u] (s%u=%08x)\n", 1214 vecitr >> FPSCR_LENGTH_BIT, dest, sn, sm, m); 1215 else if (op == FOP_EXT) 1216 pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", 1217 vecitr >> FPSCR_LENGTH_BIT, dest, sn, sm, m); 1218 else 1219 pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", 1220 vecitr >> FPSCR_LENGTH_BIT, dest, sn, 1221 FOP_TO_IDX(op), sm, m); 1222 1223 except = fop(dest, sn, m, fpscr); 1224 pr_debug("VFP: itr%d: exceptions=%08x\n", 1225 vecitr >> FPSCR_LENGTH_BIT, except); 1226 1227 exceptions |= except; 1228 1229 /* 1230 * This ensures that comparisons only operate on scalars; 1231 * comparisons always return with one FPSCR status bit set. 1232 */ 1233 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1234 break; 1235 1236 /* 1237 * CHECK: It appears to be undefined whether we stop when 1238 * we encounter an exception. We continue. 1239 */ 1240 1241 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7); 1242 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1243 if (FREG_BANK(sm) != 0) 1244 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1245 } 1246 return exceptions; 1247 1248 invalid: 1249 return (u32)-1; 1250 } 1251