1 /* 2 * linux/arch/arm/vfp/vfpsingle.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33 #include <linux/kernel.h> 34 #include <linux/bitops.h> 35 36 #include <asm/div64.h> 37 #include <asm/ptrace.h> 38 #include <asm/vfp.h> 39 40 #include "vfpinstr.h" 41 #include "vfp.h" 42 43 static struct vfp_single vfp_single_default_qnan = { 44 .exponent = 255, 45 .sign = 0, 46 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 47 }; 48 49 static void vfp_single_dump(const char *str, struct vfp_single *s) 50 { 51 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 52 str, s->sign != 0, s->exponent, s->significand); 53 } 54 55 static void vfp_single_normalise_denormal(struct vfp_single *vs) 56 { 57 int bits = 31 - fls(vs->significand); 58 59 vfp_single_dump("normalise_denormal: in", vs); 60 61 if (bits) { 62 vs->exponent -= bits - 1; 63 vs->significand <<= bits; 64 } 65 66 vfp_single_dump("normalise_denormal: out", vs); 67 } 68 69 #ifndef DEBUG 70 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 71 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 72 #else 73 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 74 #endif 75 { 76 u32 significand, incr, rmode; 77 int exponent, shift, underflow; 78 79 vfp_single_dump("pack: in", vs); 80 81 /* 82 * Infinities and NaNs are a special case. 83 */ 84 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 85 goto pack; 86 87 /* 88 * Special-case zero. 89 */ 90 if (vs->significand == 0) { 91 vs->exponent = 0; 92 goto pack; 93 } 94 95 exponent = vs->exponent; 96 significand = vs->significand; 97 98 /* 99 * Normalise first. Note that we shift the significand up to 100 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 101 * significant bit. 102 */ 103 shift = 32 - fls(significand); 104 if (shift < 32 && shift) { 105 exponent -= shift; 106 significand <<= shift; 107 } 108 109 #ifdef DEBUG 110 vs->exponent = exponent; 111 vs->significand = significand; 112 vfp_single_dump("pack: normalised", vs); 113 #endif 114 115 /* 116 * Tiny number? 117 */ 118 underflow = exponent < 0; 119 if (underflow) { 120 significand = vfp_shiftright32jamming(significand, -exponent); 121 exponent = 0; 122 #ifdef DEBUG 123 vs->exponent = exponent; 124 vs->significand = significand; 125 vfp_single_dump("pack: tiny number", vs); 126 #endif 127 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 128 underflow = 0; 129 } 130 131 /* 132 * Select rounding increment. 133 */ 134 incr = 0; 135 rmode = fpscr & FPSCR_RMODE_MASK; 136 137 if (rmode == FPSCR_ROUND_NEAREST) { 138 incr = 1 << VFP_SINGLE_LOW_BITS; 139 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 140 incr -= 1; 141 } else if (rmode == FPSCR_ROUND_TOZERO) { 142 incr = 0; 143 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 144 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 145 146 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 147 148 /* 149 * Is our rounding going to overflow? 150 */ 151 if ((significand + incr) < significand) { 152 exponent += 1; 153 significand = (significand >> 1) | (significand & 1); 154 incr >>= 1; 155 #ifdef DEBUG 156 vs->exponent = exponent; 157 vs->significand = significand; 158 vfp_single_dump("pack: overflow", vs); 159 #endif 160 } 161 162 /* 163 * If any of the low bits (which will be shifted out of the 164 * number) are non-zero, the result is inexact. 165 */ 166 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 167 exceptions |= FPSCR_IXC; 168 169 /* 170 * Do our rounding. 171 */ 172 significand += incr; 173 174 /* 175 * Infinity? 176 */ 177 if (exponent >= 254) { 178 exceptions |= FPSCR_OFC | FPSCR_IXC; 179 if (incr == 0) { 180 vs->exponent = 253; 181 vs->significand = 0x7fffffff; 182 } else { 183 vs->exponent = 255; /* infinity */ 184 vs->significand = 0; 185 } 186 } else { 187 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 188 exponent = 0; 189 if (exponent || significand > 0x80000000) 190 underflow = 0; 191 if (underflow) 192 exceptions |= FPSCR_UFC; 193 vs->exponent = exponent; 194 vs->significand = significand >> 1; 195 } 196 197 pack: 198 vfp_single_dump("pack: final", vs); 199 { 200 s32 d = vfp_single_pack(vs); 201 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 202 sd, d, exceptions); 203 vfp_put_float(sd, d); 204 } 205 206 return exceptions; 207 } 208 209 /* 210 * Propagate the NaN, setting exceptions if it is signalling. 211 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 212 */ 213 static u32 214 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 215 struct vfp_single *vsm, u32 fpscr) 216 { 217 struct vfp_single *nan; 218 int tn, tm = 0; 219 220 tn = vfp_single_type(vsn); 221 222 if (vsm) 223 tm = vfp_single_type(vsm); 224 225 if (fpscr & FPSCR_DEFAULT_NAN) 226 /* 227 * Default NaN mode - always returns a quiet NaN 228 */ 229 nan = &vfp_single_default_qnan; 230 else { 231 /* 232 * Contemporary mode - select the first signalling 233 * NAN, or if neither are signalling, the first 234 * quiet NAN. 235 */ 236 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 237 nan = vsn; 238 else 239 nan = vsm; 240 /* 241 * Make the NaN quiet. 242 */ 243 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 244 } 245 246 *vsd = *nan; 247 248 /* 249 * If one was a signalling NAN, raise invalid operation. 250 */ 251 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 252 } 253 254 255 /* 256 * Extended operations 257 */ 258 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 259 { 260 vfp_put_float(sd, vfp_single_packed_abs(m)); 261 return 0; 262 } 263 264 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 265 { 266 vfp_put_float(sd, m); 267 return 0; 268 } 269 270 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 271 { 272 vfp_put_float(sd, vfp_single_packed_negate(m)); 273 return 0; 274 } 275 276 static const u16 sqrt_oddadjust[] = { 277 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 278 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 279 }; 280 281 static const u16 sqrt_evenadjust[] = { 282 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 283 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 284 }; 285 286 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 287 { 288 int index; 289 u32 z, a; 290 291 if ((significand & 0xc0000000) != 0x40000000) { 292 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); 293 } 294 295 a = significand << 1; 296 index = (a >> 27) & 15; 297 if (exponent & 1) { 298 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 299 z = ((a / z) << 14) + (z << 15); 300 a >>= 1; 301 } else { 302 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 303 z = a / z + z; 304 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 305 if (z <= a) 306 return (s32)a >> 1; 307 } 308 { 309 u64 v = (u64)a << 31; 310 do_div(v, z); 311 return v + (z >> 1); 312 } 313 } 314 315 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 316 { 317 struct vfp_single vsm, vsd; 318 int ret, tm; 319 320 vfp_single_unpack(&vsm, m); 321 tm = vfp_single_type(&vsm); 322 if (tm & (VFP_NAN|VFP_INFINITY)) { 323 struct vfp_single *vsp = &vsd; 324 325 if (tm & VFP_NAN) 326 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 327 else if (vsm.sign == 0) { 328 sqrt_copy: 329 vsp = &vsm; 330 ret = 0; 331 } else { 332 sqrt_invalid: 333 vsp = &vfp_single_default_qnan; 334 ret = FPSCR_IOC; 335 } 336 vfp_put_float(sd, vfp_single_pack(vsp)); 337 return ret; 338 } 339 340 /* 341 * sqrt(+/- 0) == +/- 0 342 */ 343 if (tm & VFP_ZERO) 344 goto sqrt_copy; 345 346 /* 347 * Normalise a denormalised number 348 */ 349 if (tm & VFP_DENORMAL) 350 vfp_single_normalise_denormal(&vsm); 351 352 /* 353 * sqrt(<0) = invalid 354 */ 355 if (vsm.sign) 356 goto sqrt_invalid; 357 358 vfp_single_dump("sqrt", &vsm); 359 360 /* 361 * Estimate the square root. 362 */ 363 vsd.sign = 0; 364 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 365 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 366 367 vfp_single_dump("sqrt estimate", &vsd); 368 369 /* 370 * And now adjust. 371 */ 372 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 373 if (vsd.significand < 2) { 374 vsd.significand = 0xffffffff; 375 } else { 376 u64 term; 377 s64 rem; 378 vsm.significand <<= !(vsm.exponent & 1); 379 term = (u64)vsd.significand * vsd.significand; 380 rem = ((u64)vsm.significand << 32) - term; 381 382 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 383 384 while (rem < 0) { 385 vsd.significand -= 1; 386 rem += ((u64)vsd.significand << 1) | 1; 387 } 388 vsd.significand |= rem != 0; 389 } 390 } 391 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 392 393 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 394 } 395 396 /* 397 * Equal := ZC 398 * Less than := N 399 * Greater than := C 400 * Unordered := CV 401 */ 402 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 403 { 404 s32 d; 405 u32 ret = 0; 406 407 d = vfp_get_float(sd); 408 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 409 ret |= FPSCR_C | FPSCR_V; 410 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 411 /* 412 * Signalling NaN, or signalling on quiet NaN 413 */ 414 ret |= FPSCR_IOC; 415 } 416 417 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 418 ret |= FPSCR_C | FPSCR_V; 419 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 420 /* 421 * Signalling NaN, or signalling on quiet NaN 422 */ 423 ret |= FPSCR_IOC; 424 } 425 426 if (ret == 0) { 427 if (d == m || vfp_single_packed_abs(d | m) == 0) { 428 /* 429 * equal 430 */ 431 ret |= FPSCR_Z | FPSCR_C; 432 } else if (vfp_single_packed_sign(d ^ m)) { 433 /* 434 * different signs 435 */ 436 if (vfp_single_packed_sign(d)) 437 /* 438 * d is negative, so d < m 439 */ 440 ret |= FPSCR_N; 441 else 442 /* 443 * d is positive, so d > m 444 */ 445 ret |= FPSCR_C; 446 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 447 /* 448 * d < m 449 */ 450 ret |= FPSCR_N; 451 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 452 /* 453 * d > m 454 */ 455 ret |= FPSCR_C; 456 } 457 } 458 return ret; 459 } 460 461 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 462 { 463 return vfp_compare(sd, 0, m, fpscr); 464 } 465 466 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 467 { 468 return vfp_compare(sd, 1, m, fpscr); 469 } 470 471 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 472 { 473 return vfp_compare(sd, 0, 0, fpscr); 474 } 475 476 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 477 { 478 return vfp_compare(sd, 1, 0, fpscr); 479 } 480 481 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 482 { 483 struct vfp_single vsm; 484 struct vfp_double vdd; 485 int tm; 486 u32 exceptions = 0; 487 488 vfp_single_unpack(&vsm, m); 489 490 tm = vfp_single_type(&vsm); 491 492 /* 493 * If we have a signalling NaN, signal invalid operation. 494 */ 495 if (tm == VFP_SNAN) 496 exceptions = FPSCR_IOC; 497 498 if (tm & VFP_DENORMAL) 499 vfp_single_normalise_denormal(&vsm); 500 501 vdd.sign = vsm.sign; 502 vdd.significand = (u64)vsm.significand << 32; 503 504 /* 505 * If we have an infinity or NaN, the exponent must be 2047. 506 */ 507 if (tm & (VFP_INFINITY|VFP_NAN)) { 508 vdd.exponent = 2047; 509 if (tm & VFP_NAN) 510 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 511 goto pack_nan; 512 } else if (tm & VFP_ZERO) 513 vdd.exponent = 0; 514 else 515 vdd.exponent = vsm.exponent + (1023 - 127); 516 517 /* 518 * Technically, if bit 0 of dd is set, this is an invalid 519 * instruction. However, we ignore this for efficiency. 520 */ 521 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 522 523 pack_nan: 524 vfp_put_double(dd, vfp_double_pack(&vdd)); 525 return exceptions; 526 } 527 528 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 529 { 530 struct vfp_single vs; 531 532 vs.sign = 0; 533 vs.exponent = 127 + 31 - 1; 534 vs.significand = (u32)m; 535 536 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 537 } 538 539 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 540 { 541 struct vfp_single vs; 542 543 vs.sign = (m & 0x80000000) >> 16; 544 vs.exponent = 127 + 31 - 1; 545 vs.significand = vs.sign ? -m : m; 546 547 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 548 } 549 550 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 551 { 552 struct vfp_single vsm; 553 u32 d, exceptions = 0; 554 int rmode = fpscr & FPSCR_RMODE_MASK; 555 int tm; 556 557 vfp_single_unpack(&vsm, m); 558 vfp_single_dump("VSM", &vsm); 559 560 /* 561 * Do we have a denormalised number? 562 */ 563 tm = vfp_single_type(&vsm); 564 if (tm & VFP_DENORMAL) 565 exceptions |= FPSCR_IDC; 566 567 if (tm & VFP_NAN) 568 vsm.sign = 0; 569 570 if (vsm.exponent >= 127 + 32) { 571 d = vsm.sign ? 0 : 0xffffffff; 572 exceptions = FPSCR_IOC; 573 } else if (vsm.exponent >= 127 - 1) { 574 int shift = 127 + 31 - vsm.exponent; 575 u32 rem, incr = 0; 576 577 /* 578 * 2^0 <= m < 2^32-2^8 579 */ 580 d = (vsm.significand << 1) >> shift; 581 rem = vsm.significand << (33 - shift); 582 583 if (rmode == FPSCR_ROUND_NEAREST) { 584 incr = 0x80000000; 585 if ((d & 1) == 0) 586 incr -= 1; 587 } else if (rmode == FPSCR_ROUND_TOZERO) { 588 incr = 0; 589 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 590 incr = ~0; 591 } 592 593 if ((rem + incr) < rem) { 594 if (d < 0xffffffff) 595 d += 1; 596 else 597 exceptions |= FPSCR_IOC; 598 } 599 600 if (d && vsm.sign) { 601 d = 0; 602 exceptions |= FPSCR_IOC; 603 } else if (rem) 604 exceptions |= FPSCR_IXC; 605 } else { 606 d = 0; 607 if (vsm.exponent | vsm.significand) { 608 exceptions |= FPSCR_IXC; 609 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 610 d = 1; 611 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 612 d = 0; 613 exceptions |= FPSCR_IOC; 614 } 615 } 616 } 617 618 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 619 620 vfp_put_float(sd, d); 621 622 return exceptions; 623 } 624 625 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 626 { 627 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 628 } 629 630 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 631 { 632 struct vfp_single vsm; 633 u32 d, exceptions = 0; 634 int rmode = fpscr & FPSCR_RMODE_MASK; 635 int tm; 636 637 vfp_single_unpack(&vsm, m); 638 vfp_single_dump("VSM", &vsm); 639 640 /* 641 * Do we have a denormalised number? 642 */ 643 tm = vfp_single_type(&vsm); 644 if (vfp_single_type(&vsm) & VFP_DENORMAL) 645 exceptions |= FPSCR_IDC; 646 647 if (tm & VFP_NAN) { 648 d = 0; 649 exceptions |= FPSCR_IOC; 650 } else if (vsm.exponent >= 127 + 32) { 651 /* 652 * m >= 2^31-2^7: invalid 653 */ 654 d = 0x7fffffff; 655 if (vsm.sign) 656 d = ~d; 657 exceptions |= FPSCR_IOC; 658 } else if (vsm.exponent >= 127 - 1) { 659 int shift = 127 + 31 - vsm.exponent; 660 u32 rem, incr = 0; 661 662 /* 2^0 <= m <= 2^31-2^7 */ 663 d = (vsm.significand << 1) >> shift; 664 rem = vsm.significand << (33 - shift); 665 666 if (rmode == FPSCR_ROUND_NEAREST) { 667 incr = 0x80000000; 668 if ((d & 1) == 0) 669 incr -= 1; 670 } else if (rmode == FPSCR_ROUND_TOZERO) { 671 incr = 0; 672 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 673 incr = ~0; 674 } 675 676 if ((rem + incr) < rem && d < 0xffffffff) 677 d += 1; 678 if (d > 0x7fffffff + (vsm.sign != 0)) { 679 d = 0x7fffffff + (vsm.sign != 0); 680 exceptions |= FPSCR_IOC; 681 } else if (rem) 682 exceptions |= FPSCR_IXC; 683 684 if (vsm.sign) 685 d = -d; 686 } else { 687 d = 0; 688 if (vsm.exponent | vsm.significand) { 689 exceptions |= FPSCR_IXC; 690 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 691 d = 1; 692 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 693 d = -1; 694 } 695 } 696 697 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 698 699 vfp_put_float(sd, (s32)d); 700 701 return exceptions; 702 } 703 704 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 705 { 706 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 707 } 708 709 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { 710 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, 711 [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, 712 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, 713 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, 714 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, 715 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, 716 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, 717 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, 718 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, 719 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, 720 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, 721 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, 722 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, 723 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, 724 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, 725 }; 726 727 728 729 730 731 static u32 732 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 733 struct vfp_single *vsm, u32 fpscr) 734 { 735 struct vfp_single *vsp; 736 u32 exceptions = 0; 737 int tn, tm; 738 739 tn = vfp_single_type(vsn); 740 tm = vfp_single_type(vsm); 741 742 if (tn & tm & VFP_INFINITY) { 743 /* 744 * Two infinities. Are they different signs? 745 */ 746 if (vsn->sign ^ vsm->sign) { 747 /* 748 * different signs -> invalid 749 */ 750 exceptions = FPSCR_IOC; 751 vsp = &vfp_single_default_qnan; 752 } else { 753 /* 754 * same signs -> valid 755 */ 756 vsp = vsn; 757 } 758 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 759 /* 760 * One infinity and one number -> infinity 761 */ 762 vsp = vsn; 763 } else { 764 /* 765 * 'n' is a NaN of some type 766 */ 767 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 768 } 769 *vsd = *vsp; 770 return exceptions; 771 } 772 773 static u32 774 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 775 struct vfp_single *vsm, u32 fpscr) 776 { 777 u32 exp_diff, m_sig; 778 779 if (vsn->significand & 0x80000000 || 780 vsm->significand & 0x80000000) { 781 pr_info("VFP: bad FP values in %s\n", __func__); 782 vfp_single_dump("VSN", vsn); 783 vfp_single_dump("VSM", vsm); 784 } 785 786 /* 787 * Ensure that 'n' is the largest magnitude number. Note that 788 * if 'n' and 'm' have equal exponents, we do not swap them. 789 * This ensures that NaN propagation works correctly. 790 */ 791 if (vsn->exponent < vsm->exponent) { 792 struct vfp_single *t = vsn; 793 vsn = vsm; 794 vsm = t; 795 } 796 797 /* 798 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 799 * infinity or a NaN here. 800 */ 801 if (vsn->exponent == 255) 802 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 803 804 /* 805 * We have two proper numbers, where 'vsn' is the larger magnitude. 806 * 807 * Copy 'n' to 'd' before doing the arithmetic. 808 */ 809 *vsd = *vsn; 810 811 /* 812 * Align both numbers. 813 */ 814 exp_diff = vsn->exponent - vsm->exponent; 815 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 816 817 /* 818 * If the signs are different, we are really subtracting. 819 */ 820 if (vsn->sign ^ vsm->sign) { 821 m_sig = vsn->significand - m_sig; 822 if ((s32)m_sig < 0) { 823 vsd->sign = vfp_sign_negate(vsd->sign); 824 m_sig = -m_sig; 825 } else if (m_sig == 0) { 826 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 827 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 828 } 829 } else { 830 m_sig = vsn->significand + m_sig; 831 } 832 vsd->significand = m_sig; 833 834 return 0; 835 } 836 837 static u32 838 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 839 { 840 vfp_single_dump("VSN", vsn); 841 vfp_single_dump("VSM", vsm); 842 843 /* 844 * Ensure that 'n' is the largest magnitude number. Note that 845 * if 'n' and 'm' have equal exponents, we do not swap them. 846 * This ensures that NaN propagation works correctly. 847 */ 848 if (vsn->exponent < vsm->exponent) { 849 struct vfp_single *t = vsn; 850 vsn = vsm; 851 vsm = t; 852 pr_debug("VFP: swapping M <-> N\n"); 853 } 854 855 vsd->sign = vsn->sign ^ vsm->sign; 856 857 /* 858 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 859 */ 860 if (vsn->exponent == 255) { 861 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 862 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 863 if ((vsm->exponent | vsm->significand) == 0) { 864 *vsd = vfp_single_default_qnan; 865 return FPSCR_IOC; 866 } 867 vsd->exponent = vsn->exponent; 868 vsd->significand = 0; 869 return 0; 870 } 871 872 /* 873 * If 'm' is zero, the result is always zero. In this case, 874 * 'n' may be zero or a number, but it doesn't matter which. 875 */ 876 if ((vsm->exponent | vsm->significand) == 0) { 877 vsd->exponent = 0; 878 vsd->significand = 0; 879 return 0; 880 } 881 882 /* 883 * We add 2 to the destination exponent for the same reason as 884 * the addition case - though this time we have +1 from each 885 * input operand. 886 */ 887 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 888 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 889 890 vfp_single_dump("VSD", vsd); 891 return 0; 892 } 893 894 #define NEG_MULTIPLY (1 << 0) 895 #define NEG_SUBTRACT (1 << 1) 896 897 static u32 898 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 899 { 900 struct vfp_single vsd, vsp, vsn, vsm; 901 u32 exceptions; 902 s32 v; 903 904 v = vfp_get_float(sn); 905 pr_debug("VFP: s%u = %08x\n", sn, v); 906 vfp_single_unpack(&vsn, v); 907 if (vsn.exponent == 0 && vsn.significand) 908 vfp_single_normalise_denormal(&vsn); 909 910 vfp_single_unpack(&vsm, m); 911 if (vsm.exponent == 0 && vsm.significand) 912 vfp_single_normalise_denormal(&vsm); 913 914 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 915 if (negate & NEG_MULTIPLY) 916 vsp.sign = vfp_sign_negate(vsp.sign); 917 918 v = vfp_get_float(sd); 919 pr_debug("VFP: s%u = %08x\n", sd, v); 920 vfp_single_unpack(&vsn, v); 921 if (negate & NEG_SUBTRACT) 922 vsn.sign = vfp_sign_negate(vsn.sign); 923 924 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 925 926 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 927 } 928 929 /* 930 * Standard operations 931 */ 932 933 /* 934 * sd = sd + (sn * sm) 935 */ 936 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 937 { 938 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 939 } 940 941 /* 942 * sd = sd - (sn * sm) 943 */ 944 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 945 { 946 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 947 } 948 949 /* 950 * sd = -sd + (sn * sm) 951 */ 952 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 953 { 954 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 955 } 956 957 /* 958 * sd = -sd - (sn * sm) 959 */ 960 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 961 { 962 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 963 } 964 965 /* 966 * sd = sn * sm 967 */ 968 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 969 { 970 struct vfp_single vsd, vsn, vsm; 971 u32 exceptions; 972 s32 n = vfp_get_float(sn); 973 974 pr_debug("VFP: s%u = %08x\n", sn, n); 975 976 vfp_single_unpack(&vsn, n); 977 if (vsn.exponent == 0 && vsn.significand) 978 vfp_single_normalise_denormal(&vsn); 979 980 vfp_single_unpack(&vsm, m); 981 if (vsm.exponent == 0 && vsm.significand) 982 vfp_single_normalise_denormal(&vsm); 983 984 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 985 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 986 } 987 988 /* 989 * sd = -(sn * sm) 990 */ 991 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 992 { 993 struct vfp_single vsd, vsn, vsm; 994 u32 exceptions; 995 s32 n = vfp_get_float(sn); 996 997 pr_debug("VFP: s%u = %08x\n", sn, n); 998 999 vfp_single_unpack(&vsn, n); 1000 if (vsn.exponent == 0 && vsn.significand) 1001 vfp_single_normalise_denormal(&vsn); 1002 1003 vfp_single_unpack(&vsm, m); 1004 if (vsm.exponent == 0 && vsm.significand) 1005 vfp_single_normalise_denormal(&vsm); 1006 1007 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 1008 vsd.sign = vfp_sign_negate(vsd.sign); 1009 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 1010 } 1011 1012 /* 1013 * sd = sn + sm 1014 */ 1015 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1016 { 1017 struct vfp_single vsd, vsn, vsm; 1018 u32 exceptions; 1019 s32 n = vfp_get_float(sn); 1020 1021 pr_debug("VFP: s%u = %08x\n", sn, n); 1022 1023 /* 1024 * Unpack and normalise denormals. 1025 */ 1026 vfp_single_unpack(&vsn, n); 1027 if (vsn.exponent == 0 && vsn.significand) 1028 vfp_single_normalise_denormal(&vsn); 1029 1030 vfp_single_unpack(&vsm, m); 1031 if (vsm.exponent == 0 && vsm.significand) 1032 vfp_single_normalise_denormal(&vsm); 1033 1034 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1035 1036 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1037 } 1038 1039 /* 1040 * sd = sn - sm 1041 */ 1042 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1043 { 1044 /* 1045 * Subtraction is addition with one sign inverted. 1046 */ 1047 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1048 } 1049 1050 /* 1051 * sd = sn / sm 1052 */ 1053 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1054 { 1055 struct vfp_single vsd, vsn, vsm; 1056 u32 exceptions = 0; 1057 s32 n = vfp_get_float(sn); 1058 int tm, tn; 1059 1060 pr_debug("VFP: s%u = %08x\n", sn, n); 1061 1062 vfp_single_unpack(&vsn, n); 1063 vfp_single_unpack(&vsm, m); 1064 1065 vsd.sign = vsn.sign ^ vsm.sign; 1066 1067 tn = vfp_single_type(&vsn); 1068 tm = vfp_single_type(&vsm); 1069 1070 /* 1071 * Is n a NAN? 1072 */ 1073 if (tn & VFP_NAN) 1074 goto vsn_nan; 1075 1076 /* 1077 * Is m a NAN? 1078 */ 1079 if (tm & VFP_NAN) 1080 goto vsm_nan; 1081 1082 /* 1083 * If n and m are infinity, the result is invalid 1084 * If n and m are zero, the result is invalid 1085 */ 1086 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1087 goto invalid; 1088 1089 /* 1090 * If n is infinity, the result is infinity 1091 */ 1092 if (tn & VFP_INFINITY) 1093 goto infinity; 1094 1095 /* 1096 * If m is zero, raise div0 exception 1097 */ 1098 if (tm & VFP_ZERO) 1099 goto divzero; 1100 1101 /* 1102 * If m is infinity, or n is zero, the result is zero 1103 */ 1104 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1105 goto zero; 1106 1107 if (tn & VFP_DENORMAL) 1108 vfp_single_normalise_denormal(&vsn); 1109 if (tm & VFP_DENORMAL) 1110 vfp_single_normalise_denormal(&vsm); 1111 1112 /* 1113 * Ok, we have two numbers, we can perform division. 1114 */ 1115 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1116 vsm.significand <<= 1; 1117 if (vsm.significand <= (2 * vsn.significand)) { 1118 vsn.significand >>= 1; 1119 vsd.exponent++; 1120 } 1121 { 1122 u64 significand = (u64)vsn.significand << 32; 1123 do_div(significand, vsm.significand); 1124 vsd.significand = significand; 1125 } 1126 if ((vsd.significand & 0x3f) == 0) 1127 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1128 1129 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1130 1131 vsn_nan: 1132 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1133 pack: 1134 vfp_put_float(sd, vfp_single_pack(&vsd)); 1135 return exceptions; 1136 1137 vsm_nan: 1138 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1139 goto pack; 1140 1141 zero: 1142 vsd.exponent = 0; 1143 vsd.significand = 0; 1144 goto pack; 1145 1146 divzero: 1147 exceptions = FPSCR_DZC; 1148 infinity: 1149 vsd.exponent = 255; 1150 vsd.significand = 0; 1151 goto pack; 1152 1153 invalid: 1154 vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); 1155 return FPSCR_IOC; 1156 } 1157 1158 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { 1159 [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, 1160 [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, 1161 [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, 1162 [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, 1163 [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, 1164 [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, 1165 [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, 1166 [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, 1167 [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, 1168 }; 1169 1170 #define FREG_BANK(x) ((x) & 0x18) 1171 #define FREG_IDX(x) ((x) & 7) 1172 1173 u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1174 { 1175 u32 op = inst & FOP_MASK; 1176 u32 exceptions = 0; 1177 unsigned int sd = vfp_get_sd(inst); 1178 unsigned int sn = vfp_get_sn(inst); 1179 unsigned int sm = vfp_get_sm(inst); 1180 unsigned int vecitr, veclen, vecstride; 1181 u32 (*fop)(int, int, s32, u32); 1182 1183 veclen = fpscr & FPSCR_LENGTH_MASK; 1184 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1185 1186 /* 1187 * If destination bank is zero, vector length is always '1'. 1188 * ARM DDI0100F C5.1.3, C5.3.2. 1189 */ 1190 if (FREG_BANK(sd) == 0) 1191 veclen = 0; 1192 1193 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1194 (veclen >> FPSCR_LENGTH_BIT) + 1); 1195 1196 fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)]; 1197 if (!fop) 1198 goto invalid; 1199 1200 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1201 s32 m = vfp_get_float(sm); 1202 u32 except; 1203 1204 if (op == FOP_EXT) 1205 pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", 1206 vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); 1207 else 1208 pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", 1209 vecitr >> FPSCR_LENGTH_BIT, sd, sn, 1210 FOP_TO_IDX(op), sm, m); 1211 1212 except = fop(sd, sn, m, fpscr); 1213 pr_debug("VFP: itr%d: exceptions=%08x\n", 1214 vecitr >> FPSCR_LENGTH_BIT, except); 1215 1216 exceptions |= except; 1217 1218 /* 1219 * This ensures that comparisons only operate on scalars; 1220 * comparisons always return with one FPSCR status bit set. 1221 */ 1222 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1223 break; 1224 1225 /* 1226 * CHECK: It appears to be undefined whether we stop when 1227 * we encounter an exception. We continue. 1228 */ 1229 1230 sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); 1231 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1232 if (FREG_BANK(sm) != 0) 1233 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1234 } 1235 return exceptions; 1236 1237 invalid: 1238 return (u32)-1; 1239 } 1240