1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 28 /* All Rights Reserved */ 29 30 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 31 /* All Rights Reserved */ 32 33 #pragma ident "%Z%%M% %I% %E% SMI" 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/signal.h> 38 #include <sys/regset.h> 39 #include <sys/privregs.h> 40 #include <sys/psw.h> 41 #include <sys/trap.h> 42 #include <sys/fault.h> 43 #include <sys/systm.h> 44 #include <sys/user.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/pcb.h> 48 #include <sys/lwp.h> 49 #include <sys/cpuvar.h> 50 #include <sys/thread.h> 51 #include <sys/disp.h> 52 #include <sys/fp.h> 53 #include <sys/siginfo.h> 54 #include <sys/archsystm.h> 55 #include <sys/kmem.h> 56 #include <sys/debug.h> 57 #include <sys/x86_archext.h> 58 #include <sys/sysmacros.h> 59 60 /*CSTYLED*/ 61 #pragma align 16 (sse_initial) 62 63 /* 64 * Initial kfpu state for SSE/SSE2 used by fpinit() 65 */ 66 const struct fxsave_state sse_initial = { 67 FPU_CW_INIT, /* fx_fcw */ 68 0, /* fx_fsw */ 69 0, /* fx_fctw */ 70 0, /* fx_fop */ 71 #if defined(__amd64) 72 0, /* fx_rip */ 73 0, /* fx_rdp */ 74 #else 75 0, /* fx_eip */ 76 0, /* fx_cs */ 77 0, /* __fx_ign0 */ 78 0, /* fx_dp */ 79 0, /* fx_ds */ 80 0, /* __fx_ign1 */ 81 #endif /* __amd64 */ 82 SSE_MXCSR_INIT /* fx_mxcsr */ 83 /* rest of structure is zero */ 84 }; 85 86 /* 87 * mxcsr_mask value (possibly reset in fpu_probe); used to avoid 88 * the #gp exception caused by setting unsupported bits in the 89 * MXCSR register 90 */ 91 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT; 92 93 /* 94 * Initial kfpu state for x87 used by fpinit() 95 */ 96 const struct fnsave_state x87_initial = { 97 FPU_CW_INIT, /* f_fcw */ 98 0, /* __f_ign0 */ 99 0, /* f_fsw */ 100 0, /* __f_ign1 */ 101 0xffff, /* f_ftw */ 102 /* rest of structure is zero */ 103 }; 104 105 #if defined(__amd64) 106 #define fpsave_ctxt fpxsave_ctxt 107 #elif defined(__i386) 108 /* 109 * This vector is patched to fpxsave_ctxt() if we discover 110 * we have an SSE-capable chip in fpu_probe(). 111 */ 112 void (*fpsave_ctxt)(void *) = fpnsave_ctxt; 113 #endif 114 115 static int fpe_sicode(uint_t); 116 static int fpe_simd_sicode(uint_t); 117 118 /* 119 * Copy the state of parent lwp's floating point context into the new lwp. 120 * Invoked for both fork() and lwp_create(). 121 * 122 * Note that we inherit -only- the control state (e.g. exception masks, 123 * rounding, precision control, etc.); the FPU registers are otherwise 124 * reset to their initial state. 125 */ 126 static void 127 fp_new_lwp(kthread_id_t t, kthread_id_t ct) 128 { 129 struct fpu_ctx *fp; /* parent fpu context */ 130 struct fpu_ctx *cfp; /* new fpu context */ 131 struct fxsave_state *fx, *cfx; 132 133 ASSERT(fp_kind != FP_NO); 134 135 fp = &t->t_lwp->lwp_pcb.pcb_fpu; 136 cfp = &ct->t_lwp->lwp_pcb.pcb_fpu; 137 138 /* 139 * If the parent FPU state is still in the FPU hw then save it; 140 * conveniently, fp_save() already does this for us nicely. 141 */ 142 fp_save(fp); 143 144 cfp->fpu_flags = FPU_EN | FPU_VALID; 145 cfp->fpu_regs.kfpu_status = 0; 146 cfp->fpu_regs.kfpu_xstatus = 0; 147 148 #if defined(__amd64) 149 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 150 cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; 151 bcopy(&sse_initial, cfx, sizeof (*cfx)); 152 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 153 cfx->fx_fcw = fx->fx_fcw; 154 #else 155 if (fp_kind == __FP_SSE) { 156 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 157 cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; 158 bcopy(&sse_initial, cfx, sizeof (*cfx)); 159 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 160 cfx->fx_fcw = fx->fx_fcw; 161 } else { 162 struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn; 163 struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn; 164 165 bcopy(&x87_initial, cfn, sizeof (*cfn)); 166 cfn->f_fcw = fn->f_fcw; 167 } 168 #endif 169 installctx(ct, cfp, 170 fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); 171 /* 172 * Now, when the new lwp starts running, it will take a trap 173 * that will be handled inline in the trap table to cause 174 * the appropriate f*rstor instruction to load the save area we 175 * constructed above directly into the hardware. 176 */ 177 } 178 179 /* 180 * Free any state associated with floating point context. 181 * Fp_free can be called in three cases: 182 * 1) from reaper -> thread_free -> ctxfree -> fp_free 183 * fp context belongs to a thread on deathrow 184 * nothing to do, thread will never be resumed 185 * thread calling ctxfree is reaper 186 * 187 * 2) from exec -> ctxfree -> fp_free 188 * fp context belongs to the current thread 189 * must disable fpu, thread calling ctxfree is curthread 190 * 191 * 3) from restorecontext -> setfpregs -> fp_free 192 * we have a modified context in the memory (lwp->pcb_fpu) 193 * disable fpu and release the fp context for the CPU 194 * 195 */ 196 /*ARGSUSED*/ 197 void 198 fp_free(struct fpu_ctx *fp, int isexec) 199 { 200 ASSERT(fp_kind != FP_NO); 201 202 if (fp->fpu_flags & FPU_VALID) 203 return; 204 205 kpreempt_disable(); 206 /* 207 * We want to do fpsave rather than fpdisable so that we can 208 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit 209 */ 210 fp->fpu_flags |= FPU_VALID; 211 /* If for current thread disable FP to track FPU_VALID */ 212 if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) { 213 /* Clear errors if any to prevent frstor from complaining */ 214 (void) fperr_reset(); 215 if (fp_kind == __FP_SSE) 216 (void) fpxerr_reset(); 217 fpdisable(); 218 } 219 kpreempt_enable(); 220 } 221 222 /* 223 * Store the floating point state and disable the floating point unit. 224 */ 225 void 226 fp_save(struct fpu_ctx *fp) 227 { 228 ASSERT(fp_kind != FP_NO); 229 230 kpreempt_disable(); 231 if (!fp || fp->fpu_flags & FPU_VALID) { 232 kpreempt_enable(); 233 return; 234 } 235 ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); 236 237 #if defined(__amd64) 238 fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); 239 #else 240 switch (fp_kind) { 241 case __FP_SSE: 242 fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); 243 break; 244 default: 245 fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn); 246 break; 247 } 248 #endif 249 fp->fpu_flags |= FPU_VALID; 250 kpreempt_enable(); 251 } 252 253 /* 254 * Restore the FPU context for the thread: 255 * The possibilities are: 256 * 1. No active FPU context: Load the new context into the FPU hw 257 * and enable the FPU. 258 */ 259 void 260 fp_restore(struct fpu_ctx *fp) 261 { 262 #if defined(__amd64) 263 fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); 264 #else 265 /* case 2 */ 266 if (fp_kind == __FP_SSE) 267 fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); 268 else 269 fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn); 270 #endif 271 fp->fpu_flags &= ~FPU_VALID; 272 } 273 274 275 /* 276 * Seeds the initial state for the current thread. The possibilities are: 277 * 1. Another process has modified the FPU state before we have done any 278 * initialization: Load the FPU state from the LWP state. 279 * 2. The FPU state has not been externally modified: Load a clean state. 280 */ 281 static void 282 fp_seed(void) 283 { 284 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 285 286 ASSERT(curthread->t_preempt >= 1); 287 ASSERT((fp->fpu_flags & FPU_EN) == 0); 288 289 /* 290 * Always initialize a new context and initialize the hardware. 291 */ 292 installctx(curthread, fp, 293 fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); 294 fpinit(); 295 296 /* 297 * If FPU_VALID is set, it means someone has modified registers via 298 * /proc. In this case, restore the current lwp's state. 299 */ 300 if (fp->fpu_flags & FPU_VALID) 301 fp_restore(fp); 302 303 ASSERT((fp->fpu_flags & FPU_VALID) == 0); 304 fp->fpu_flags = FPU_EN; 305 } 306 307 /* 308 * This routine is called from trap() when User thread takes No Extension 309 * Fault. The possiblities are: 310 * 1. User thread has executed a FP instruction for the first time. 311 * Save current FPU context if any. Initialize FPU, setup FPU 312 * context for the thread and enable FP hw. 313 * 2. Thread's pcb has a valid FPU state: Restore the FPU state and 314 * enable FP hw. 315 * 316 * Note that case #2 is inlined in the trap table. 317 */ 318 int 319 fpnoextflt(struct regs *rp) 320 { 321 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 322 323 #if !defined(__lint) 324 ASSERT(sizeof (struct fxsave_state) == 512 && 325 sizeof (struct fnsave_state) == 108); 326 ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); 327 #if defined(__i386) 328 ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu)); 329 #endif /* __i386 */ 330 #endif /* !__lint */ 331 332 /* 333 * save area MUST be 16-byte aligned, else will page fault 334 */ 335 ASSERT(((uintptr_t)(&fp->fpu_regs.kfpu_u.kfpu_fx) & 0xf) == 0); 336 337 kpreempt_disable(); 338 /* 339 * Now we can enable the interrupts. 340 * (NOTE: fp-no-coprocessor comes thru interrupt gate) 341 */ 342 sti(); 343 344 if (!fpu_exists) { /* check for FPU hw exists */ 345 if (fp_kind == FP_NO) { 346 uint32_t inst; 347 348 /* 349 * When the system has no floating point support, 350 * i.e. no FP hardware and no emulator, skip the 351 * two kinds of FP instruction that occur in 352 * fpstart. Allows processes that do no real FP 353 * to run normally. 354 */ 355 if (fuword32((void *)rp->r_pc, &inst) != -1 && 356 ((inst & 0xFFFF) == 0x7dd9 || 357 (inst & 0xFFFF) == 0x6dd9)) { 358 rp->r_pc += 3; 359 kpreempt_enable(); 360 return (0); 361 } 362 } 363 364 /* 365 * If we have neither a processor extension nor 366 * an emulator, kill the process OR panic the kernel. 367 */ 368 kpreempt_enable(); 369 return (1); /* error */ 370 } 371 372 #if !defined(__xpv) /* XXPV Is this ifdef needed now? */ 373 /* 374 * A paranoid cross-check: for the SSE case, ensure that %cr4 is 375 * configured to enable fully fledged (%xmm) fxsave/fxrestor on 376 * this CPU. For the non-SSE case, ensure that it isn't. 377 */ 378 ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || 379 (fp_kind != __FP_SSE && 380 (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0)); 381 #endif 382 383 if (fp->fpu_flags & FPU_EN) { 384 /* case 2 */ 385 fp_restore(fp); 386 } else { 387 /* case 1 */ 388 fp_seed(); 389 } 390 kpreempt_enable(); 391 return (0); 392 } 393 394 395 /* 396 * Handle a processor extension overrun fault 397 * Returns non zero for error. 398 * 399 * XXX Shouldn't this just be abolished given that we're not supporting 400 * anything prior to Pentium? 401 */ 402 403 /* ARGSUSED */ 404 int 405 fpextovrflt(struct regs *rp) 406 { 407 #if !defined(__xpv) /* XXPV Do we need this ifdef either */ 408 ulong_t cur_cr0; 409 410 ASSERT(fp_kind != FP_NO); 411 412 cur_cr0 = getcr0(); 413 fpinit(); /* initialize the FPU hardware */ 414 setcr0(cur_cr0); 415 #endif 416 sti(); 417 return (1); /* error, send SIGSEGV signal to the thread */ 418 } 419 420 /* 421 * Handle a processor extension error fault 422 * Returns non zero for error. 423 */ 424 425 /*ARGSUSED*/ 426 int 427 fpexterrflt(struct regs *rp) 428 { 429 uint32_t fpcwsw; 430 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 431 432 ASSERT(fp_kind != FP_NO); 433 434 fpcwsw = fpgetcwsw(); 435 /* 436 * Now we can enable the interrupts. 437 * (NOTE: x87 fp exceptions come thru interrupt gate) 438 */ 439 sti(); 440 441 if ((fpcwsw & FPS_ES) == 0) 442 return (0); /* No exception */ 443 444 if (fpu_exists) { 445 fp_save(fp); 446 /* clear exception flags in saved state, as if by fnclex */ 447 #if defined(__amd64) 448 fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; 449 #else 450 switch (fp_kind) { 451 case __FP_SSE: 452 fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; 453 break; 454 default: 455 fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS; 456 break; 457 } 458 #endif 459 } 460 fp->fpu_regs.kfpu_status = fpcwsw & 0xffff; 461 /* 462 * "and" the exception flags with the complement of the mask 463 * bits to determine which exception occurred 464 */ 465 return (fpe_sicode(fpcwsw & ~(fpcwsw >> 16) & 0x3f)); 466 } 467 468 /* 469 * Handle an SSE/SSE2 precise exception. 470 * Returns a non-zero sicode for error. 471 */ 472 /*ARGSUSED*/ 473 int 474 fpsimderrflt(struct regs *rp) 475 { 476 uint32_t mxcsr, xmask; 477 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 478 479 ASSERT(fp_kind == __FP_SSE); 480 481 mxcsr = fpgetmxcsr(); 482 if (fpu_exists) { 483 fp_save(fp); /* save the FPU state */ 484 fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; 485 } else { 486 fp->fpu_regs.kfpu_status = fpgetcwsw() & 0xffff; 487 } 488 fp->fpu_regs.kfpu_xstatus = mxcsr; 489 490 /* 491 * compute the mask that determines which conditions can cause 492 * a #xm exception, and use this to clean the status bits so that 493 * we can identify the true cause of this one. 494 */ 495 xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS; 496 return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask)); 497 } 498 499 /* 500 * In the unlikely event that someone is relying on this subcode being 501 * FPE_FLTILL for denormalize exceptions, it can always be patched back 502 * again to restore old behaviour. 503 */ 504 int fpe_fltden = FPE_FLTDEN; 505 506 /* 507 * Map from the FPU status word to the FP exception si_code. 508 */ 509 static int 510 fpe_sicode(uint_t sw) 511 { 512 if (sw & FPS_IE) 513 return (FPE_FLTINV); 514 if (sw & FPS_ZE) 515 return (FPE_FLTDIV); 516 if (sw & FPS_DE) 517 return (fpe_fltden); 518 if (sw & FPS_OE) 519 return (FPE_FLTOVF); 520 if (sw & FPS_UE) 521 return (FPE_FLTUND); 522 if (sw & FPS_PE) 523 return (FPE_FLTRES); 524 return (FPE_FLTINV); /* default si_code for other exceptions */ 525 } 526 527 /* 528 * Map from the SSE status word to the FP exception si_code. 529 */ 530 static int 531 fpe_simd_sicode(uint_t sw) 532 { 533 if (sw & SSE_IE) 534 return (FPE_FLTINV); 535 if (sw & SSE_ZE) 536 return (FPE_FLTDIV); 537 if (sw & SSE_DE) 538 return (FPE_FLTDEN); 539 if (sw & SSE_OE) 540 return (FPE_FLTOVF); 541 if (sw & SSE_UE) 542 return (FPE_FLTUND); 543 if (sw & SSE_PE) 544 return (FPE_FLTRES); 545 return (FPE_FLTINV); /* default si_code for other exceptions */ 546 } 547 548 /* 549 * This routine is invoked as part of libc's __fpstart implementation 550 * via sysi86(2). 551 * 552 * It may be called -before- any context has been assigned in which case 553 * we try and avoid touching the hardware. Or it may be invoked well 554 * after the context has been assigned and fiddled with, in which case 555 * just tweak it directly. 556 */ 557 void 558 fpsetcw(uint16_t fcw, uint32_t mxcsr) 559 { 560 struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu; 561 struct fxsave_state *fx; 562 563 if (!fpu_exists || fp_kind == FP_NO) 564 return; 565 566 if ((fp->fpu_flags & FPU_EN) == 0) { 567 if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) { 568 /* 569 * Common case. Floating point unit not yet 570 * enabled, and kernel already intends to initialize 571 * the hardware the way the caller wants. 572 */ 573 return; 574 } 575 /* 576 * Hmm. Userland wants a different default. 577 * Do a fake "first trap" to establish the context, then 578 * handle as if we already had a context before we came in. 579 */ 580 kpreempt_disable(); 581 fp_seed(); 582 kpreempt_enable(); 583 } 584 585 /* 586 * Ensure that the current hardware state is flushed back to the 587 * pcb, then modify that copy. Next use of the fp will 588 * restore the context. 589 */ 590 fp_save(fp); 591 592 #if defined(__amd64) 593 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 594 fx->fx_fcw = fcw; 595 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 596 #else 597 switch (fp_kind) { 598 case __FP_SSE: 599 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 600 fx->fx_fcw = fcw; 601 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 602 break; 603 default: 604 fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw; 605 break; 606 } 607 #endif 608 } 609