1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 28 /* All Rights Reserved */ 29 30 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 31 /* All Rights Reserved */ 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/signal.h> 38 #include <sys/regset.h> 39 #include <sys/privregs.h> 40 #include <sys/psw.h> 41 #include <sys/trap.h> 42 #include <sys/fault.h> 43 #include <sys/systm.h> 44 #include <sys/user.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/pcb.h> 48 #include <sys/lwp.h> 49 #include <sys/cpuvar.h> 50 #include <sys/thread.h> 51 #include <sys/disp.h> 52 #include <sys/fp.h> 53 #include <sys/siginfo.h> 54 #include <sys/archsystm.h> 55 #include <sys/kmem.h> 56 #include <sys/debug.h> 57 #include <sys/x86_archext.h> 58 #include <sys/sysmacros.h> 59 60 /*CSTYLED*/ 61 #pragma align 16 (sse_initial) 62 63 /* 64 * Initial kfpu state for SSE/SSE2 used by fpinit() 65 */ 66 const struct fxsave_state sse_initial = { 67 FPU_CW_INIT, /* fx_fcw */ 68 0, /* fx_fsw */ 69 0, /* fx_fctw */ 70 0, /* fx_fop */ 71 #if defined(__amd64) 72 0, /* fx_rip */ 73 0, /* fx_rdp */ 74 #else 75 0, /* fx_eip */ 76 0, /* fx_cs */ 77 0, /* __fx_ign0 */ 78 0, /* fx_dp */ 79 0, /* fx_ds */ 80 0, /* __fx_ign1 */ 81 #endif /* __amd64 */ 82 SSE_MXCSR_INIT /* fx_mxcsr */ 83 /* rest of structure is zero */ 84 }; 85 86 /* 87 * mxcsr_mask value (possibly reset in fpu_probe); used to avoid 88 * the #gp exception caused by setting unsupported bits in the 89 * MXCSR register 90 */ 91 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT; 92 93 /* 94 * Initial kfpu state for x87 used by fpinit() 95 */ 96 const struct fnsave_state x87_initial = { 97 FPU_CW_INIT, /* f_fcw */ 98 0, /* __f_ign0 */ 99 0, /* f_fsw */ 100 0, /* __f_ign1 */ 101 0xffff, /* f_ftw */ 102 /* rest of structure is zero */ 103 }; 104 105 #if defined(__amd64) 106 #define fpsave_ctxt fpxsave_ctxt 107 #elif defined(__i386) 108 /* 109 * This vector is patched to fpxsave_ctxt() if we discover 110 * we have an SSE-capable chip in fpu_probe(). 111 */ 112 void (*fpsave_ctxt)(void *) = fpnsave_ctxt; 113 #endif 114 115 static int fpe_sicode(uint_t); 116 static int fpe_simd_sicode(uint_t); 117 118 /* 119 * Copy the state of parent lwp's floating point context into the new lwp. 120 * Invoked for both fork() and lwp_create(). 121 * 122 * Note that we inherit -only- the control state (e.g. exception masks, 123 * rounding, precision control, etc.); the FPU registers are otherwise 124 * reset to their initial state. 125 */ 126 static void 127 fp_new_lwp(kthread_id_t t, kthread_id_t ct) 128 { 129 struct fpu_ctx *fp; /* parent fpu context */ 130 struct fpu_ctx *cfp; /* new fpu context */ 131 struct fxsave_state *fx, *cfx; 132 133 ASSERT(fp_kind != FP_NO); 134 135 fp = &t->t_lwp->lwp_pcb.pcb_fpu; 136 cfp = &ct->t_lwp->lwp_pcb.pcb_fpu; 137 138 /* 139 * If the parent FPU state is still in the FPU hw then save it; 140 * conveniently, fp_save() already does this for us nicely. 141 */ 142 fp_save(fp); 143 144 cfp->fpu_flags = FPU_EN | FPU_VALID; 145 cfp->fpu_regs.kfpu_status = 0; 146 cfp->fpu_regs.kfpu_xstatus = 0; 147 148 #if defined(__amd64) 149 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 150 cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; 151 bcopy(&sse_initial, cfx, sizeof (*cfx)); 152 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 153 cfx->fx_fcw = fx->fx_fcw; 154 #else 155 if (fp_kind == __FP_SSE) { 156 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 157 cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; 158 bcopy(&sse_initial, cfx, sizeof (*cfx)); 159 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 160 cfx->fx_fcw = fx->fx_fcw; 161 } else { 162 struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn; 163 struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn; 164 165 bcopy(&x87_initial, cfn, sizeof (*cfn)); 166 cfn->f_fcw = fn->f_fcw; 167 } 168 #endif 169 installctx(ct, cfp, 170 fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); 171 /* 172 * Now, when the new lwp starts running, it will take a trap 173 * that will be handled inline in the trap table to cause 174 * the appropriate f*rstor instruction to load the save area we 175 * constructed above directly into the hardware. 176 */ 177 } 178 179 /* 180 * Free any state associated with floating point context. 181 * Fp_free can be called in three cases: 182 * 1) from reaper -> thread_free -> ctxfree -> fp_free 183 * fp context belongs to a thread on deathrow 184 * nothing to do, thread will never be resumed 185 * thread calling ctxfree is reaper 186 * 187 * 2) from exec -> ctxfree -> fp_free 188 * fp context belongs to the current thread 189 * must disable fpu, thread calling ctxfree is curthread 190 * 191 * 3) from restorecontext -> setfpregs -> fp_free 192 * we have a modified context in the memory (lwp->pcb_fpu) 193 * disable fpu and release the fp context for the CPU 194 * 195 */ 196 /*ARGSUSED*/ 197 void 198 fp_free(struct fpu_ctx *fp, int isexec) 199 { 200 ASSERT(fp_kind != FP_NO); 201 202 if (fp->fpu_flags & FPU_VALID) 203 return; 204 205 kpreempt_disable(); 206 /* 207 * We want to do fpsave rather than fpdisable so that we can 208 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit 209 */ 210 fp->fpu_flags |= FPU_VALID; 211 /* If for current thread disable FP to track FPU_VALID */ 212 if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) { 213 /* Clear errors if any to prevent frstor from complaining */ 214 (void) fperr_reset(); 215 if (fp_kind == __FP_SSE) 216 (void) fpxerr_reset(); 217 fpdisable(); 218 } 219 kpreempt_enable(); 220 } 221 222 /* 223 * Store the floating point state and disable the floating point unit. 224 */ 225 void 226 fp_save(struct fpu_ctx *fp) 227 { 228 ASSERT(fp_kind != FP_NO); 229 230 kpreempt_disable(); 231 if (!fp || fp->fpu_flags & FPU_VALID) { 232 kpreempt_enable(); 233 return; 234 } 235 ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); 236 237 #if defined(__amd64) 238 fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); 239 #else 240 switch (fp_kind) { 241 case __FP_SSE: 242 fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); 243 break; 244 default: 245 fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn); 246 break; 247 } 248 #endif 249 fp->fpu_flags |= FPU_VALID; 250 kpreempt_enable(); 251 } 252 253 /* 254 * Restore the FPU context for the thread: 255 * The possibilities are: 256 * 1. No active FPU context: Load the new context into the FPU hw 257 * and enable the FPU. 258 */ 259 void 260 fp_restore(struct fpu_ctx *fp) 261 { 262 #if defined(__amd64) 263 fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); 264 #else 265 /* case 2 */ 266 if (fp_kind == __FP_SSE) 267 fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); 268 else 269 fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn); 270 #endif 271 fp->fpu_flags &= ~FPU_VALID; 272 } 273 274 275 /* 276 * Seeds the initial state for the current thread. The possibilities are: 277 * 1. Another process has modified the FPU state before we have done any 278 * initialization: Load the FPU state from the LWP state. 279 * 2. The FPU state has not been externally modified: Load a clean state. 280 */ 281 static void 282 fp_seed(void) 283 { 284 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 285 286 ASSERT(curthread->t_preempt >= 1); 287 ASSERT((fp->fpu_flags & FPU_EN) == 0); 288 289 /* 290 * Always initialize a new context and initialize the hardware. 291 */ 292 installctx(curthread, fp, 293 fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); 294 fpinit(); 295 296 /* 297 * If FPU_VALID is set, it means someone has modified registers via 298 * /proc. In this case, restore the current lwp's state. 299 */ 300 if (fp->fpu_flags & FPU_VALID) 301 fp_restore(fp); 302 303 ASSERT((fp->fpu_flags & FPU_VALID) == 0); 304 fp->fpu_flags = FPU_EN; 305 } 306 307 /* 308 * This routine is called from trap() when User thread takes No Extension 309 * Fault. The possiblities are: 310 * 1. User thread has executed a FP instruction for the first time. 311 * Save current FPU context if any. Initialize FPU, setup FPU 312 * context for the thread and enable FP hw. 313 * 2. Thread's pcb has a valid FPU state: Restore the FPU state and 314 * enable FP hw. 315 * 316 * Note that case #2 is inlined in the trap table. 317 */ 318 int 319 fpnoextflt(struct regs *rp) 320 { 321 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 322 323 #if !defined(__lint) 324 ASSERT(sizeof (struct fxsave_state) == 512 && 325 sizeof (struct fnsave_state) == 108); 326 ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); 327 #if defined(__i386) 328 ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu)); 329 #endif /* __i386 */ 330 #endif /* !__lint */ 331 332 /* 333 * save area MUST be 16-byte aligned, else will page fault 334 */ 335 ASSERT(((uintptr_t)(&fp->fpu_regs.kfpu_u.kfpu_fx) & 0xf) == 0); 336 337 kpreempt_disable(); 338 /* 339 * Now we can enable the interrupts. 340 * (NOTE: fp-no-coprocessor comes thru interrupt gate) 341 */ 342 sti(); 343 344 if (!fpu_exists) { /* check for FPU hw exists */ 345 if (fp_kind == FP_NO) { 346 uint32_t inst; 347 348 /* 349 * When the system has no floating point support, 350 * i.e. no FP hardware and no emulator, skip the 351 * two kinds of FP instruction that occur in 352 * fpstart. Allows processes that do no real FP 353 * to run normally. 354 */ 355 if (fuword32((void *)rp->r_pc, &inst) != -1 && 356 ((inst & 0xFFFF) == 0x7dd9 || 357 (inst & 0xFFFF) == 0x6dd9)) { 358 rp->r_pc += 3; 359 kpreempt_enable(); 360 return (0); 361 } 362 } 363 364 /* 365 * If we have neither a processor extension nor 366 * an emulator, kill the process OR panic the kernel. 367 */ 368 kpreempt_enable(); 369 return (1); /* error */ 370 } 371 372 #if !defined(__xpv) /* XXPV Is this ifdef needed now? */ 373 /* 374 * A paranoid cross-check: for the SSE case, ensure that %cr4 is 375 * configured to enable fully fledged (%xmm) fxsave/fxrestor on 376 * this CPU. For the non-SSE case, ensure that it isn't. 377 */ 378 ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || 379 (fp_kind != __FP_SSE && 380 (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0)); 381 #endif 382 383 if (fp->fpu_flags & FPU_EN) { 384 /* case 2 */ 385 fp_restore(fp); 386 } else { 387 /* case 1 */ 388 fp_seed(); 389 } 390 kpreempt_enable(); 391 return (0); 392 } 393 394 395 /* 396 * Handle a processor extension overrun fault 397 * Returns non zero for error. 398 * 399 * XXX Shouldn't this just be abolished given that we're not supporting 400 * anything prior to Pentium? 401 */ 402 403 /* ARGSUSED */ 404 int 405 fpextovrflt(struct regs *rp) 406 { 407 #if !defined(__xpv) /* XXPV Do we need this ifdef either */ 408 ulong_t cur_cr0; 409 410 ASSERT(fp_kind != FP_NO); 411 412 cur_cr0 = getcr0(); 413 fpinit(); /* initialize the FPU hardware */ 414 setcr0(cur_cr0); 415 #endif 416 sti(); 417 return (1); /* error, send SIGSEGV signal to the thread */ 418 } 419 420 /* 421 * Handle a processor extension error fault 422 * Returns non zero for error. 423 */ 424 425 /*ARGSUSED*/ 426 int 427 fpexterrflt(struct regs *rp) 428 { 429 uint32_t fpcw, fpsw; 430 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 431 432 ASSERT(fp_kind != FP_NO); 433 434 /* 435 * Now we can enable the interrupts. 436 * (NOTE: x87 fp exceptions come thru interrupt gate) 437 */ 438 sti(); 439 440 if (!fpu_exists) 441 return (FPE_FLTINV); 442 443 /* 444 * Do an unconditional save of the FP state. If it's dirty (TS=0), 445 * it'll be saved into the fpu context area passed in (that of the 446 * current thread). If it's not dirty (it may not be, due to 447 * an intervening save due to a context switch between the sti(), 448 * above and here, then it's safe to just use the stored values in 449 * the context save area to determine the cause of the fault. 450 */ 451 fp_save(fp); 452 453 /* clear exception flags in saved state, as if by fnclex */ 454 #if defined(__amd64) 455 fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; 456 fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw; 457 fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; 458 #else 459 switch (fp_kind) { 460 case __FP_SSE: 461 fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; 462 fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw; 463 fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; 464 break; 465 default: 466 fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw; 467 fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw; 468 fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS; 469 break; 470 } 471 #endif 472 473 fp->fpu_regs.kfpu_status = fpsw; 474 475 if ((fpsw & FPS_ES) == 0) 476 return (0); /* No exception */ 477 478 /* 479 * "and" the exception flags with the complement of the mask 480 * bits to determine which exception occurred 481 */ 482 return (fpe_sicode(fpsw & ~fpcw & 0x3f)); 483 } 484 485 /* 486 * Handle an SSE/SSE2 precise exception. 487 * Returns a non-zero sicode for error. 488 */ 489 /*ARGSUSED*/ 490 int 491 fpsimderrflt(struct regs *rp) 492 { 493 uint32_t mxcsr, xmask; 494 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 495 496 ASSERT(fp_kind == __FP_SSE); 497 498 /* 499 * NOTE: Interrupts are disabled during execution of this 500 * function. They are enabled by the caller in trap.c. 501 */ 502 503 /* 504 * The only way we could have gotten here if there is no FP unit 505 * is via a user executing an INT $19 instruction, so there is 506 * no fault in that case. 507 */ 508 if (!fpu_exists) 509 return (0); 510 511 /* 512 * Do an unconditional save of the FP state. If it's dirty (TS=0), 513 * it'll be saved into the fpu context area passed in (that of the 514 * current thread). If it's not dirty, then it's safe to just use 515 * the stored values in the context save area to determine the 516 * cause of the fault. 517 */ 518 fp_save(fp); /* save the FPU state */ 519 520 mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx.fx_mxcsr; 521 522 fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; 523 524 fp->fpu_regs.kfpu_xstatus = mxcsr; 525 526 /* 527 * compute the mask that determines which conditions can cause 528 * a #xm exception, and use this to clean the status bits so that 529 * we can identify the true cause of this one. 530 */ 531 xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS; 532 return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask)); 533 } 534 535 /* 536 * In the unlikely event that someone is relying on this subcode being 537 * FPE_FLTILL for denormalize exceptions, it can always be patched back 538 * again to restore old behaviour. 539 */ 540 int fpe_fltden = FPE_FLTDEN; 541 542 /* 543 * Map from the FPU status word to the FP exception si_code. 544 */ 545 static int 546 fpe_sicode(uint_t sw) 547 { 548 if (sw & FPS_IE) 549 return (FPE_FLTINV); 550 if (sw & FPS_ZE) 551 return (FPE_FLTDIV); 552 if (sw & FPS_DE) 553 return (fpe_fltden); 554 if (sw & FPS_OE) 555 return (FPE_FLTOVF); 556 if (sw & FPS_UE) 557 return (FPE_FLTUND); 558 if (sw & FPS_PE) 559 return (FPE_FLTRES); 560 return (FPE_FLTINV); /* default si_code for other exceptions */ 561 } 562 563 /* 564 * Map from the SSE status word to the FP exception si_code. 565 */ 566 static int 567 fpe_simd_sicode(uint_t sw) 568 { 569 if (sw & SSE_IE) 570 return (FPE_FLTINV); 571 if (sw & SSE_ZE) 572 return (FPE_FLTDIV); 573 if (sw & SSE_DE) 574 return (FPE_FLTDEN); 575 if (sw & SSE_OE) 576 return (FPE_FLTOVF); 577 if (sw & SSE_UE) 578 return (FPE_FLTUND); 579 if (sw & SSE_PE) 580 return (FPE_FLTRES); 581 return (FPE_FLTINV); /* default si_code for other exceptions */ 582 } 583 584 /* 585 * This routine is invoked as part of libc's __fpstart implementation 586 * via sysi86(2). 587 * 588 * It may be called -before- any context has been assigned in which case 589 * we try and avoid touching the hardware. Or it may be invoked well 590 * after the context has been assigned and fiddled with, in which case 591 * just tweak it directly. 592 */ 593 void 594 fpsetcw(uint16_t fcw, uint32_t mxcsr) 595 { 596 struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu; 597 struct fxsave_state *fx; 598 599 if (!fpu_exists || fp_kind == FP_NO) 600 return; 601 602 if ((fp->fpu_flags & FPU_EN) == 0) { 603 if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) { 604 /* 605 * Common case. Floating point unit not yet 606 * enabled, and kernel already intends to initialize 607 * the hardware the way the caller wants. 608 */ 609 return; 610 } 611 /* 612 * Hmm. Userland wants a different default. 613 * Do a fake "first trap" to establish the context, then 614 * handle as if we already had a context before we came in. 615 */ 616 kpreempt_disable(); 617 fp_seed(); 618 kpreempt_enable(); 619 } 620 621 /* 622 * Ensure that the current hardware state is flushed back to the 623 * pcb, then modify that copy. Next use of the fp will 624 * restore the context. 625 */ 626 fp_save(fp); 627 628 #if defined(__amd64) 629 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 630 fx->fx_fcw = fcw; 631 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 632 #else 633 switch (fp_kind) { 634 case __FP_SSE: 635 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 636 fx->fx_fcw = fcw; 637 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 638 break; 639 default: 640 fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw; 641 break; 642 } 643 #endif 644 } 645