1 /*- 2 * Copyright (C) 1996 Wolfgang Solfrank. 3 * Copyright (C) 1996 TooLs GmbH. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by TooLs GmbH. 17 * 4. The name of TooLs GmbH may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 * 31 * $NetBSD: fpu.c,v 1.5 2001/07/22 11:29:46 wiz Exp $ 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/proc.h> 39 #include <sys/systm.h> 40 #include <sys/limits.h> 41 42 #include <machine/altivec.h> 43 #include <machine/fpu.h> 44 #include <machine/ieeefp.h> 45 #include <machine/pcb.h> 46 #include <machine/psl.h> 47 48 #include <powerpc/fpu/fpu_arith.h> 49 #include <powerpc/fpu/fpu_emu.h> 50 #include <powerpc/fpu/fpu_extern.h> 51 52 void spe_handle_fpdata(struct trapframe *); 53 void spe_handle_fpround(struct trapframe *); 54 static int spe_emu_instr(uint32_t, struct fpemu *, struct fpn **, uint32_t *); 55 56 static void 57 save_vec_int(struct thread *td) 58 { 59 int msr; 60 struct pcb *pcb; 61 62 pcb = td->td_pcb; 63 64 /* 65 * Temporarily re-enable the vector unit during the save 66 */ 67 msr = mfmsr(); 68 mtmsr(msr | PSL_VEC); 69 isync(); 70 71 /* 72 * Save the vector registers and SPEFSCR to the PCB 73 */ 74 #define EVSTDW(n) __asm ("evstdw %1,0(%0)" \ 75 :: "b"(pcb->pcb_vec.vr[n]), "n"(n)); 76 EVSTDW(0); EVSTDW(1); EVSTDW(2); EVSTDW(3); 77 EVSTDW(4); EVSTDW(5); EVSTDW(6); EVSTDW(7); 78 EVSTDW(8); EVSTDW(9); EVSTDW(10); EVSTDW(11); 79 EVSTDW(12); EVSTDW(13); EVSTDW(14); EVSTDW(15); 80 EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19); 81 EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23); 82 EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27); 83 EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31); 84 #undef EVSTDW 85 86 __asm ( "evxor 0,0,0\n" 87 "evaddumiaaw 0,0\n" 88 "evstdd 0,0(%0)" :: "b"(&pcb->pcb_vec.spare[0])); 89 pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); 90 91 /* 92 * Disable vector unit again 93 */ 94 isync(); 95 mtmsr(msr); 96 97 } 98 99 void 100 enable_vec(struct thread *td) 101 { 102 int msr; 103 struct pcb *pcb; 104 struct trapframe *tf; 105 106 pcb = td->td_pcb; 107 tf = trapframe(td); 108 109 /* 110 * Save the thread's SPE CPU number, and set the CPU's current 111 * vector thread 112 */ 113 td->td_pcb->pcb_veccpu = PCPU_GET(cpuid); 114 PCPU_SET(vecthread, td); 115 116 /* 117 * Enable the vector unit for when the thread returns from the 118 * exception. If this is the first time the unit has been used by 119 * the thread, initialise the vector registers and VSCR to 0, and 120 * set the flag to indicate that the vector unit is in use. 121 */ 122 tf->srr1 |= PSL_VEC; 123 if (!(pcb->pcb_flags & PCB_VEC)) { 124 memset(&pcb->pcb_vec, 0, sizeof pcb->pcb_vec); 125 pcb->pcb_flags |= PCB_VEC; 126 pcb->pcb_vec.vscr = mfspr(SPR_SPEFSCR); 127 } 128 129 /* 130 * Temporarily enable the vector unit so the registers 131 * can be restored. 132 */ 133 msr = mfmsr(); 134 mtmsr(msr | PSL_VEC); 135 136 /* Restore SPEFSCR and ACC. Use %r0 as the scratch for ACC. */ 137 mtspr(SPR_SPEFSCR, pcb->pcb_vec.vscr); 138 __asm __volatile("evldd 0, 0(%0); evmra 0,0\n" 139 :: "b"(&pcb->pcb_vec.spare[0])); 140 141 /* 142 * The lower half of each register will be restored on trap return. Use 143 * %r0 as a scratch register, and restore it last. 144 */ 145 #define EVLDW(n) __asm __volatile("evldw 0, 0(%0); evmergehilo "#n",0,"#n \ 146 :: "b"(&pcb->pcb_vec.vr[n])); 147 EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4); 148 EVLDW(5); EVLDW(6); EVLDW(7); EVLDW(8); 149 EVLDW(9); EVLDW(10); EVLDW(11); EVLDW(12); 150 EVLDW(13); EVLDW(14); EVLDW(15); EVLDW(16); 151 EVLDW(17); EVLDW(18); EVLDW(19); EVLDW(20); 152 EVLDW(21); EVLDW(22); EVLDW(23); EVLDW(24); 153 EVLDW(25); EVLDW(26); EVLDW(27); EVLDW(28); 154 EVLDW(29); EVLDW(30); EVLDW(31); EVLDW(0); 155 #undef EVLDW 156 157 isync(); 158 mtmsr(msr); 159 } 160 161 void 162 save_vec(struct thread *td) 163 { 164 struct pcb *pcb; 165 166 pcb = td->td_pcb; 167 168 save_vec_int(td); 169 170 /* 171 * Clear the current vec thread and pcb's CPU id 172 * XXX should this be left clear to allow lazy save/restore ? 173 */ 174 pcb->pcb_veccpu = INT_MAX; 175 PCPU_SET(vecthread, NULL); 176 } 177 178 /* 179 * Save SPE state without dropping ownership. This will only save state if 180 * the current vector-thread is `td'. 181 */ 182 void 183 save_vec_nodrop(struct thread *td) 184 { 185 struct thread *vtd; 186 187 vtd = PCPU_GET(vecthread); 188 if (td != vtd) { 189 return; 190 } 191 192 save_vec_int(td); 193 } 194 195 196 #define SPE_INST_MASK 0x31f 197 #define EADD 0x200 198 #define ESUB 0x201 199 #define EABS 0x204 200 #define ENABS 0x205 201 #define ENEG 0x206 202 #define EMUL 0x208 203 #define EDIV 0x209 204 #define ECMPGT 0x20c 205 #define ECMPLT 0x20d 206 #define ECMPEQ 0x20e 207 #define ECFUI 0x210 208 #define ECFSI 0x211 209 #define ECTUI 0x214 210 #define ECTSI 0x215 211 #define ECTUF 0x216 212 #define ECTSF 0x217 213 #define ECTUIZ 0x218 214 #define ECTSIZ 0x21a 215 216 #define SPE 0x4 217 #define SPFP 0x6 218 #define DPFP 0x7 219 220 #define SPE_OPC 4 221 #define OPC_SHIFT 26 222 223 #define EVFSADD 0x280 224 #define EVFSSUB 0x281 225 #define EVFSABS 0x284 226 #define EVFSNABS 0x285 227 #define EVFSNEG 0x286 228 #define EVFSMUL 0x288 229 #define EVFSDIV 0x289 230 #define EVFSCMPGT 0x28c 231 #define EVFSCMPLT 0x28d 232 #define EVFSCMPEQ 0x28e 233 #define EVFSCFUI 0x290 234 #define EVFSCFSI 0x291 235 #define EVFSCTUI 0x294 236 #define EVFSCTSI 0x295 237 #define EVFSCTUF 0x296 238 #define EVFSCTSF 0x297 239 #define EVFSCTUIZ 0x298 240 #define EVFSCTSIZ 0x29a 241 242 #define EFSADD 0x2c0 243 #define EFSSUB 0x2c1 244 #define EFSABS 0x2c4 245 #define EFSNABS 0x2c5 246 #define EFSNEG 0x2c6 247 #define EFSMUL 0x2c8 248 #define EFSDIV 0x2c9 249 #define EFSCMPGT 0x2cc 250 #define EFSCMPLT 0x2cd 251 #define EFSCMPEQ 0x2ce 252 #define EFSCFD 0x2cf 253 #define EFSCFUI 0x2d0 254 #define EFSCFSI 0x2d1 255 #define EFSCTUI 0x2d4 256 #define EFSCTSI 0x2d5 257 #define EFSCTUF 0x2d6 258 #define EFSCTSF 0x2d7 259 #define EFSCTUIZ 0x2d8 260 #define EFSCTSIZ 0x2da 261 262 #define EFDADD 0x2e0 263 #define EFDSUB 0x2e1 264 #define EFDABS 0x2e4 265 #define EFDNABS 0x2e5 266 #define EFDNEG 0x2e6 267 #define EFDMUL 0x2e8 268 #define EFDDIV 0x2e9 269 #define EFDCMPGT 0x2ec 270 #define EFDCMPLT 0x2ed 271 #define EFDCMPEQ 0x2ee 272 #define EFDCFS 0x2ef 273 #define EFDCFUI 0x2f0 274 #define EFDCFSI 0x2f1 275 #define EFDCTUI 0x2f4 276 #define EFDCTSI 0x2f5 277 #define EFDCTUF 0x2f6 278 #define EFDCTSF 0x2f7 279 #define EFDCTUIZ 0x2f8 280 #define EFDCTSIZ 0x2fa 281 282 enum { 283 NONE, 284 SINGLE, 285 DOUBLE, 286 VECTOR, 287 }; 288 289 static uint32_t fpscr_to_spefscr(uint32_t fpscr) 290 { 291 uint32_t spefscr; 292 293 spefscr = 0; 294 295 if (fpscr & FPSCR_VX) 296 spefscr |= SPEFSCR_FINV; 297 if (fpscr & FPSCR_OX) 298 spefscr |= SPEFSCR_FOVF; 299 if (fpscr & FPSCR_UX) 300 spefscr |= SPEFSCR_FUNF; 301 if (fpscr & FPSCR_ZX) 302 spefscr |= SPEFSCR_FDBZ; 303 if (fpscr & FPSCR_XX) 304 spefscr |= SPEFSCR_FX; 305 306 return (spefscr); 307 } 308 309 /* Sign is 0 for unsigned, 1 for signed. */ 310 static int 311 spe_to_int(struct fpemu *fpemu, struct fpn *fpn, uint32_t *val, int sign) 312 { 313 uint32_t res[2]; 314 315 res[0] = fpu_ftox(fpemu, fpn, res); 316 if (res[0] != UINT_MAX && res[0] != 0) 317 fpemu->fe_cx |= FPSCR_OX; 318 else if (sign == 0 && res[0] != 0) 319 fpemu->fe_cx |= FPSCR_UX; 320 else 321 *val = res[1]; 322 323 return (0); 324 } 325 326 /* Masked instruction */ 327 /* 328 * For compare instructions, returns 1 if success, 0 if not. For all others, 329 * returns -1, or -2 if no result needs recorded. 330 */ 331 static int 332 spe_emu_instr(uint32_t instr, struct fpemu *fpemu, 333 struct fpn **result, uint32_t *iresult) 334 { 335 switch (instr & SPE_INST_MASK) { 336 case EABS: 337 case ENABS: 338 case ENEG: 339 /* Taken care of elsewhere. */ 340 break; 341 case ECTUIZ: 342 fpemu->fe_cx &= ~FPSCR_RN; 343 fpemu->fe_cx |= FP_RZ; 344 case ECTUI: 345 spe_to_int(fpemu, &fpemu->fe_f2, iresult, 0); 346 return (-1); 347 case ECTSIZ: 348 fpemu->fe_cx &= ~FPSCR_RN; 349 fpemu->fe_cx |= FP_RZ; 350 case ECTSI: 351 spe_to_int(fpemu, &fpemu->fe_f2, iresult, 1); 352 return (-1); 353 case EADD: 354 *result = fpu_add(fpemu); 355 break; 356 case ESUB: 357 *result = fpu_sub(fpemu); 358 break; 359 case EMUL: 360 *result = fpu_mul(fpemu); 361 break; 362 case EDIV: 363 *result = fpu_div(fpemu); 364 break; 365 case ECMPGT: 366 fpu_compare(fpemu, 0); 367 if (fpemu->fe_cx & FPSCR_FG) 368 return (1); 369 return (0); 370 case ECMPLT: 371 fpu_compare(fpemu, 0); 372 if (fpemu->fe_cx & FPSCR_FL) 373 return (1); 374 return (0); 375 case ECMPEQ: 376 fpu_compare(fpemu, 0); 377 if (fpemu->fe_cx & FPSCR_FE) 378 return (1); 379 return (0); 380 default: 381 printf("Unknown instruction %x\n", instr); 382 } 383 384 return (-1); 385 } 386 387 static int 388 spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type, 389 uint32_t hi, uint32_t lo) 390 { 391 uint32_t s; 392 393 fp->fp_sign = hi >> 31; 394 fp->fp_sticky = 0; 395 switch (type) { 396 case SINGLE: 397 s = fpu_stof(fp, hi); 398 break; 399 400 case DOUBLE: 401 s = fpu_dtof(fp, hi, lo); 402 break; 403 } 404 405 if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { 406 /* 407 * Input is a signalling NaN. All operations that return 408 * an input NaN operand put it through a ``NaN conversion'', 409 * which basically just means ``turn on the quiet bit''. 410 * We do this here so that all NaNs internally look quiet 411 * (we can tell signalling ones by their class). 412 */ 413 fp->fp_mant[0] |= FP_QUIETBIT; 414 fe->fe_cx = FPSCR_VXSNAN; /* assert invalid operand */ 415 s = FPC_SNAN; 416 } 417 fp->fp_class = s; 418 419 return (0); 420 } 421 422 void 423 spe_handle_fpdata(struct trapframe *frame) 424 { 425 struct fpemu fpemu; 426 struct fpn *result; 427 uint32_t instr, instr_sec_op; 428 uint32_t cr_shift, ra, rb, rd, src; 429 uint32_t high, low, res; /* For vector operations. */ 430 uint32_t spefscr = 0; 431 uint32_t ftod_res[2]; 432 int width; /* Single, Double, Vector, Integer */ 433 int err; 434 435 err = fueword32((void *)frame->srr0, &instr); 436 437 if (err != 0) 438 return; 439 /* Fault. */; 440 441 if ((instr >> OPC_SHIFT) != SPE_OPC) 442 return; 443 444 /* 445 * 'cr' field is the upper 3 bits of rd. Magically, since a) rd is 5 446 * bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is 447 * modified for most compare operations, the full value of rd can be 448 * used as a shift value. 449 */ 450 rd = (instr >> 21) & 0x1f; 451 ra = (instr >> 16) & 0x1f; 452 rb = (instr >> 11) & 0x1f; 453 src = (instr >> 5) & 0x7; 454 cr_shift = 28 - (rd & 0x1f); 455 456 instr_sec_op = (instr & 0x7ff); 457 458 memset(&fpemu, 0, sizeof(fpemu)); 459 460 width = NONE; 461 switch (src) { 462 case SPE: 463 save_vec_nodrop(curthread); 464 switch (instr_sec_op) { 465 case EVFSABS: 466 curthread->td_pcb->pcb_vec.vr[rd][0] = 467 curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31); 468 frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); 469 break; 470 case EVFSNABS: 471 curthread->td_pcb->pcb_vec.vr[rd][0] = 472 curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31); 473 frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); 474 break; 475 case EVFSNEG: 476 curthread->td_pcb->pcb_vec.vr[rd][0] = 477 curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31); 478 frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); 479 break; 480 default: 481 /* High word */ 482 spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, 483 curthread->td_pcb->pcb_vec.vr[ra][0], 0); 484 spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, 485 curthread->td_pcb->pcb_vec.vr[rb][0], 0); 486 high = spe_emu_instr(instr_sec_op, &fpemu, &result, 487 &curthread->td_pcb->pcb_vec.vr[rd][0]); 488 489 spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16; 490 /* Clear the fpemu to start over on the lower bits. */ 491 memset(&fpemu, 0, sizeof(fpemu)); 492 493 /* Now low word */ 494 spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, 495 frame->fixreg[ra], 0); 496 spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, 497 frame->fixreg[rb], 0); 498 spefscr |= fpscr_to_spefscr(fpemu.fe_cx); 499 low = spe_emu_instr(instr_sec_op, &fpemu, &result, 500 &frame->fixreg[rd]); 501 if (instr_sec_op == EVFSCMPEQ || 502 instr_sec_op == EVFSCMPGT || 503 instr_sec_op == EVFSCMPLT) { 504 res = (high << 3) | (low << 2) | 505 ((high | low) << 1) | (high & low); 506 width = NONE; 507 } else 508 width = VECTOR; 509 break; 510 } 511 enable_vec(curthread); 512 goto end; 513 514 case SPFP: 515 switch (instr_sec_op) { 516 case EFSABS: 517 frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31); 518 break; 519 case EFSNABS: 520 frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31); 521 break; 522 case EFSNEG: 523 frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31); 524 break; 525 case EFSCFD: 526 spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE, 527 curthread->td_pcb->pcb_vec.vr[rb][0], 528 frame->fixreg[rb]); 529 result = &fpemu.fe_f3; 530 width = SINGLE; 531 break; 532 default: 533 spe_explode(&fpemu, &fpemu.fe_f1, SINGLE, 534 frame->fixreg[ra], 0); 535 spe_explode(&fpemu, &fpemu.fe_f2, SINGLE, 536 frame->fixreg[rb], 0); 537 width = SINGLE; 538 } 539 break; 540 case DPFP: 541 save_vec_nodrop(curthread); 542 switch (instr_sec_op) { 543 case EFDABS: 544 curthread->td_pcb->pcb_vec.vr[rd][0] = 545 curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31); 546 break; 547 case EFDNABS: 548 curthread->td_pcb->pcb_vec.vr[rd][0] = 549 curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31); 550 break; 551 case EFDNEG: 552 curthread->td_pcb->pcb_vec.vr[rd][0] = 553 curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31); 554 break; 555 case EFDCFS: 556 spe_explode(&fpemu, &fpemu.fe_f3, SINGLE, 557 frame->fixreg[rb], 0); 558 result = &fpemu.fe_f3; 559 width = DOUBLE; 560 break; 561 default: 562 spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE, 563 curthread->td_pcb->pcb_vec.vr[ra][0], 564 frame->fixreg[ra]); 565 spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE, 566 curthread->td_pcb->pcb_vec.vr[rb][0], 567 frame->fixreg[rb]); 568 width = DOUBLE; 569 } 570 break; 571 } 572 switch (instr_sec_op) { 573 case EFDCFS: 574 case EFSCFD: 575 /* Already handled. */ 576 break; 577 default: 578 res = spe_emu_instr(instr_sec_op, &fpemu, &result, 579 &frame->fixreg[rd]); 580 if (res != -1) 581 res <<= 2; 582 break; 583 } 584 585 switch (instr_sec_op & SPE_INST_MASK) { 586 case ECMPEQ: 587 case ECMPGT: 588 case ECMPLT: 589 frame->cr &= ~(0xf << cr_shift); 590 frame->cr |= (res << cr_shift); 591 break; 592 case ECTUI: 593 case ECTUIZ: 594 case ECTSI: 595 case ECTSIZ: 596 break; 597 default: 598 switch (width) { 599 case NONE: 600 case VECTOR: 601 break; 602 case SINGLE: 603 frame->fixreg[rd] = fpu_ftos(&fpemu, result); 604 break; 605 case DOUBLE: 606 curthread->td_pcb->pcb_vec.vr[rd][0] = 607 fpu_ftod(&fpemu, result, ftod_res); 608 frame->fixreg[rd] = ftod_res[1]; 609 enable_vec(curthread); 610 break; 611 default: 612 panic("Unknown storage width %d", width); 613 break; 614 } 615 } 616 617 end: 618 spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS); 619 mtspr(SPR_SPEFSCR, spefscr); 620 frame->srr0 += 4; 621 622 return; 623 } 624 625 void 626 spe_handle_fpround(struct trapframe *frame) 627 { 628 629 /* 630 * Punt fpround exceptions for now. This leaves the truncated result in 631 * the register. We'll deal with overflow/underflow later. 632 */ 633 return; 634 } 635