1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 29 /* All Rights Reserved */ 30 31 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 32 /* All Rights Reserved */ 33 34 #pragma ident "%Z%%M% %I% %E% SMI" 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 #include <sys/signal.h> 39 #include <sys/regset.h> 40 #include <sys/privregs.h> 41 #include <sys/psw.h> 42 #include <sys/trap.h> 43 #include <sys/fault.h> 44 #include <sys/systm.h> 45 #include <sys/user.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/pcb.h> 49 #include <sys/lwp.h> 50 #include <sys/cpuvar.h> 51 #include <sys/thread.h> 52 #include <sys/disp.h> 53 #include <sys/fp.h> 54 #include <sys/siginfo.h> 55 #include <sys/archsystm.h> 56 #include <sys/kmem.h> 57 #include <sys/debug.h> 58 #include <sys/x86_archext.h> 59 #include <sys/sysmacros.h> 60 61 /*CSTYLED*/ 62 #pragma align 16 (sse_initial) 63 64 /* 65 * Initial kfpu state for SSE/SSE2 used by fpinit() 66 */ 67 const struct fxsave_state sse_initial = { 68 FPU_CW_INIT, /* fx_fcw */ 69 0, /* fx_fsw */ 70 0, /* fx_fctw */ 71 0, /* fx_fop */ 72 #if defined(__amd64) 73 0, /* fx_rip */ 74 0, /* fx_rdp */ 75 #else 76 0, /* fx_eip */ 77 0, /* fx_cs */ 78 0, /* __fx_ign0 */ 79 0, /* fx_dp */ 80 0, /* fx_ds */ 81 0, /* __fx_ign1 */ 82 #endif /* __amd64 */ 83 SSE_MXCSR_INIT /* fx_mxcsr */ 84 /* rest of structure is zero */ 85 }; 86 87 /* 88 * mxcsr_mask value (possibly reset in fpu_probe); used to avoid 89 * the #gp exception caused by setting unsupported bits in the 90 * MXCSR register 91 */ 92 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT; 93 94 /* 95 * Initial kfpu state for x87 used by fpinit() 96 */ 97 const struct fnsave_state x87_initial = { 98 FPU_CW_INIT, /* f_fcw */ 99 0, /* __f_ign0 */ 100 0, /* f_fsw */ 101 0, /* __f_ign1 */ 102 0xffff, /* f_ftw */ 103 /* rest of structure is zero */ 104 }; 105 106 #if defined(__amd64) 107 #define fpsave_begin fpxsave_begin 108 #elif defined(__i386) 109 /* 110 * This vector is patched to fpxsave_begin() if we discover 111 * we have an SSE-capable chip in fpu_probe(). 112 */ 113 void (*fpsave_begin)(void *) = fpnsave_begin; 114 #endif 115 116 static int fpe_sicode(uint_t); 117 static int fpe_simd_sicode(uint_t); 118 119 /* 120 * Copy the state of parent lwp's floating point context into the new lwp. 121 * Invoked for both fork() and lwp_create(). 122 * 123 * Note that we inherit -only- the control state (e.g. exception masks, 124 * rounding, precision control, etc.); the FPU registers are otherwise 125 * reset to their initial state. 126 */ 127 static void 128 fp_new_lwp(kthread_id_t t, kthread_id_t ct) 129 { 130 struct fpu_ctx *fp; /* parent fpu context */ 131 struct fpu_ctx *cfp; /* new fpu context */ 132 struct fxsave_state *fx, *cfx; 133 134 ASSERT(fp_kind != FP_NO); 135 136 fp = &t->t_lwp->lwp_pcb.pcb_fpu; 137 cfp = &ct->t_lwp->lwp_pcb.pcb_fpu; 138 139 /* 140 * If the parent FPU state is still in the FPU hw then save it; 141 * conveniently, fp_save() already does this for us nicely. 142 */ 143 fp_save(fp); 144 145 cfp->fpu_flags = FPU_EN | FPU_VALID; 146 cfp->fpu_regs.kfpu_status = 0; 147 cfp->fpu_regs.kfpu_xstatus = 0; 148 149 #if defined(__amd64) 150 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 151 cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; 152 bcopy(&sse_initial, cfx, sizeof (*cfx)); 153 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 154 cfx->fx_fcw = fx->fx_fcw; 155 #else 156 if (fp_kind == __FP_SSE) { 157 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 158 cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx; 159 bcopy(&sse_initial, cfx, sizeof (*cfx)); 160 cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 161 cfx->fx_fcw = fx->fx_fcw; 162 } else { 163 struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn; 164 struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn; 165 166 bcopy(&x87_initial, cfn, sizeof (*cfn)); 167 cfn->f_fcw = fn->f_fcw; 168 } 169 #endif 170 installctx(ct, cfp, 171 fpsave_begin, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); 172 /* 173 * Now, when the new lwp starts running, it will take a trap 174 * that will be handled inline in the trap table to cause 175 * the appropriate f*rstor instruction to load the save area we 176 * constructed above directly into the hardware. 177 */ 178 } 179 180 /* 181 * Free any state associated with floating point context. 182 * Fp_free can be called in three cases: 183 * 1) from reaper -> thread_free -> ctxfree -> fp_free 184 * fp context belongs to a thread on deathrow 185 * nothing to do, thread will never be resumed 186 * thread calling ctxfree is reaper 187 * 188 * 2) from exec -> ctxfree -> fp_free 189 * fp context belongs to the current thread 190 * must disable fpu, thread calling ctxfree is curthread 191 * 192 * 3) from restorecontext -> setfpregs -> fp_free 193 * we have a modified context in the memory (lwp->pcb_fpu) 194 * disable fpu and release the fp context for the CPU 195 * 196 */ 197 /*ARGSUSED*/ 198 void 199 fp_free(struct fpu_ctx *fp, int isexec) 200 { 201 ASSERT(fp_kind != FP_NO); 202 203 if (fp->fpu_flags & FPU_VALID) 204 return; 205 206 kpreempt_disable(); 207 /* 208 * We want to do fpsave rather than fpdisable so that we can 209 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit 210 */ 211 fp->fpu_flags |= FPU_VALID; 212 /* If for current thread disable FP to track FPU_VALID */ 213 if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) { 214 /* Clear errors if any to prevent frstor from complaining */ 215 (void) fperr_reset(); 216 if (fp_kind == __FP_SSE) 217 (void) fpxerr_reset(); 218 fpdisable(); 219 } 220 kpreempt_enable(); 221 } 222 223 /* 224 * Store the floating point state and disable the floating point unit. 225 */ 226 void 227 fp_save(struct fpu_ctx *fp) 228 { 229 ASSERT(fp_kind != FP_NO); 230 231 kpreempt_disable(); 232 if (!fp || fp->fpu_flags & FPU_VALID) { 233 kpreempt_enable(); 234 return; 235 } 236 ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); 237 238 #if defined(__amd64) 239 fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); 240 #else 241 switch (fp_kind) { 242 case __FP_SSE: 243 fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx); 244 break; 245 default: 246 fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn); 247 break; 248 } 249 #endif 250 fp->fpu_flags |= FPU_VALID; 251 kpreempt_enable(); 252 } 253 254 /* 255 * Restore the FPU context for the thread: 256 * The possibilities are: 257 * 1. No active FPU context: Load the new context into the FPU hw 258 * and enable the FPU. 259 */ 260 void 261 fp_restore(struct fpu_ctx *fp) 262 { 263 #if defined(__amd64) 264 fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); 265 #else 266 /* case 2 */ 267 if (fp_kind == __FP_SSE) 268 fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx); 269 else 270 fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn); 271 #endif 272 fp->fpu_flags &= ~FPU_VALID; 273 } 274 275 276 /* 277 * Seeds the initial state for the current thread. The possibilities are: 278 * 1. Another process has modified the FPU state before we have done any 279 * initialization: Load the FPU state from the LWP state. 280 * 2. The FPU state has not been externally modified: Load a clean state. 281 */ 282 static void 283 fp_seed(void) 284 { 285 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 286 287 ASSERT(curthread->t_preempt >= 1); 288 ASSERT((fp->fpu_flags & FPU_EN) == 0); 289 290 /* 291 * Always initialize a new context and initialize the hardware. 292 */ 293 installctx(curthread, fp, 294 fpsave_begin, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free); 295 fpinit(); 296 297 /* 298 * If FPU_VALID is set, it means someone has modified registers via 299 * /proc. In this case, restore the current lwp's state. 300 */ 301 if (fp->fpu_flags & FPU_VALID) 302 fp_restore(fp); 303 304 ASSERT((fp->fpu_flags & FPU_VALID) == 0); 305 fp->fpu_flags = FPU_EN; 306 } 307 308 /* 309 * This routine is called from trap() when User thread takes No Extension 310 * Fault. The possiblities are: 311 * 1. User thread has executed a FP instruction for the first time. 312 * Save current FPU context if any. Initialize FPU, setup FPU 313 * context for the thread and enable FP hw. 314 * 2. Thread's pcb has a valid FPU state: Restore the FPU state and 315 * enable FP hw. 316 * 317 * Note that case #2 is inlined in the trap table. 318 */ 319 int 320 fpnoextflt(struct regs *rp) 321 { 322 struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 323 324 #if !defined(__lint) 325 ASSERT(sizeof (struct fxsave_state) == 512 && 326 sizeof (struct fnsave_state) == 108); 327 ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); 328 #if defined(__i386) 329 ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu)); 330 #endif /* __i386 */ 331 #endif /* !__lint */ 332 333 /* 334 * save area MUST be 16-byte aligned, else will page fault 335 */ 336 ASSERT(((uintptr_t)(&fp->fpu_regs.kfpu_u.kfpu_fx) & 0xf) == 0); 337 338 kpreempt_disable(); 339 /* 340 * Now we can enable the interrupts. 341 * (NOTE: fp-no-coprocessor comes thru interrupt gate) 342 */ 343 sti(); 344 345 if (!fpu_exists) { /* check for FPU hw exists */ 346 if (fp_kind == FP_NO) { 347 uint32_t inst; 348 349 /* 350 * When the system has no floating point support, 351 * i.e. no FP hardware and no emulator, skip the 352 * two kinds of FP instruction that occur in 353 * fpstart. Allows processes that do no real FP 354 * to run normally. 355 */ 356 if (fuword32((void *)rp->r_pc, &inst) != -1 && 357 ((inst & 0xFFFF) == 0x7dd9 || 358 (inst & 0xFFFF) == 0x6dd9)) { 359 rp->r_pc += 3; 360 kpreempt_enable(); 361 return (0); 362 } 363 } 364 365 /* 366 * If we have neither a processor extension nor 367 * an emulator, kill the process OR panic the kernel. 368 */ 369 kpreempt_enable(); 370 return (1); /* error */ 371 } 372 373 /* 374 * A paranoid cross-check: for the SSE case, ensure that %cr4 is 375 * configured to enable fully fledged (%xmm) fxsave/fxrestor on 376 * this CPU. For the non-SSE case, ensure that it isn't. 377 */ 378 ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) || 379 (fp_kind != __FP_SSE && 380 (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0)); 381 382 if (fp->fpu_flags & FPU_EN) { 383 /* case 2 */ 384 fp_restore(fp); 385 } else { 386 /* case 1 */ 387 fp_seed(); 388 } 389 kpreempt_enable(); 390 return (0); 391 } 392 393 394 /* 395 * Handle a processor extension overrun fault 396 * Returns non zero for error. 397 */ 398 399 /* ARGSUSED */ 400 int 401 fpextovrflt(struct regs *rp) 402 { 403 ulong_t cur_cr0; 404 405 ASSERT(fp_kind != FP_NO); 406 407 cur_cr0 = getcr0(); 408 fpinit(); /* initialize the FPU hardware */ 409 setcr0(cur_cr0); 410 sti(); 411 412 return (1); /* error, send SIGSEGV signal to the thread */ 413 } 414 415 /* 416 * Handle a processor extension error fault 417 * Returns non zero for error. 418 */ 419 420 /*ARGSUSED*/ 421 int 422 fpexterrflt(struct regs *rp) 423 { 424 uint32_t fpcwsw; 425 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 426 427 ASSERT(fp_kind != FP_NO); 428 429 fpcwsw = fpgetcwsw(); 430 /* 431 * Now we can enable the interrupts. 432 * (NOTE: x87 fp exceptions come thru interrupt gate) 433 */ 434 sti(); 435 436 if ((fpcwsw & FPS_ES) == 0) 437 return (0); /* No exception */ 438 439 if (fpu_exists) { 440 fp_save(fp); 441 /* clear exception flags in saved state, as if by fnclex */ 442 #if defined(__amd64) 443 fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; 444 #else 445 switch (fp_kind) { 446 case __FP_SSE: 447 fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS; 448 break; 449 default: 450 fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS; 451 break; 452 } 453 #endif 454 } 455 fp->fpu_regs.kfpu_status = fpcwsw & 0xffff; 456 /* 457 * "and" the exception flags with the complement of the mask 458 * bits to determine which exception occurred 459 */ 460 return (fpe_sicode(fpcwsw & ~(fpcwsw >> 16) & 0x3f)); 461 } 462 463 /* 464 * Handle an SSE/SSE2 precise exception. 465 * Returns a non-zero sicode for error. 466 */ 467 /*ARGSUSED*/ 468 int 469 fpsimderrflt(struct regs *rp) 470 { 471 uint32_t mxcsr, xmask; 472 fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 473 474 ASSERT(fp_kind == __FP_SSE); 475 476 mxcsr = fpgetmxcsr(); 477 if (fpu_exists) { 478 fp_save(fp); /* save the FPU state */ 479 fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw; 480 } else { 481 fp->fpu_regs.kfpu_status = fpgetcwsw() & 0xffff; 482 } 483 fp->fpu_regs.kfpu_xstatus = mxcsr; 484 485 /* 486 * compute the mask that determines which conditions can cause 487 * a #xm exception, and use this to clean the status bits so that 488 * we can identify the true cause of this one. 489 */ 490 xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS; 491 return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask)); 492 } 493 494 /* 495 * In the unlikely event that someone is relying on this subcode being 496 * FPE_FLTILL for denormalize exceptions, it can always be patched back 497 * again to restore old behaviour. 498 */ 499 int fpe_fltden = FPE_FLTDEN; 500 501 /* 502 * Map from the FPU status word to the FP exception si_code. 503 */ 504 static int 505 fpe_sicode(uint_t sw) 506 { 507 if (sw & FPS_IE) 508 return (FPE_FLTINV); 509 if (sw & FPS_ZE) 510 return (FPE_FLTDIV); 511 if (sw & FPS_DE) 512 return (fpe_fltden); 513 if (sw & FPS_OE) 514 return (FPE_FLTOVF); 515 if (sw & FPS_UE) 516 return (FPE_FLTUND); 517 if (sw & FPS_PE) 518 return (FPE_FLTRES); 519 return (FPE_FLTINV); /* default si_code for other exceptions */ 520 } 521 522 /* 523 * Map from the SSE status word to the FP exception si_code. 524 */ 525 static int 526 fpe_simd_sicode(uint_t sw) 527 { 528 if (sw & SSE_IE) 529 return (FPE_FLTINV); 530 if (sw & SSE_ZE) 531 return (FPE_FLTDIV); 532 if (sw & SSE_DE) 533 return (FPE_FLTDEN); 534 if (sw & SSE_OE) 535 return (FPE_FLTOVF); 536 if (sw & SSE_UE) 537 return (FPE_FLTUND); 538 if (sw & SSE_PE) 539 return (FPE_FLTRES); 540 return (FPE_FLTINV); /* default si_code for other exceptions */ 541 } 542 543 /* 544 * This routine is invoked as part of libc's __fpstart implementation 545 * via sysi86(2). 546 * 547 * It may be called -before- any context has been assigned in which case 548 * we try and avoid touching the hardware. Or it may be invoked well 549 * after the context has been assigned and fiddled with, in which case 550 * just tweak it directly. 551 */ 552 void 553 fpsetcw(uint16_t fcw, uint32_t mxcsr) 554 { 555 struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu; 556 struct fxsave_state *fx; 557 558 if (!fpu_exists || fp_kind == FP_NO) 559 return; 560 561 if ((fp->fpu_flags & FPU_EN) == 0) { 562 if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) { 563 /* 564 * Common case. Floating point unit not yet 565 * enabled, and kernel already intends to initialize 566 * the hardware the way the caller wants. 567 */ 568 return; 569 } 570 /* 571 * Hmm. Userland wants a different default. 572 * Do a fake "first trap" to establish the context, then 573 * handle as if we already had a context before we came in. 574 */ 575 kpreempt_disable(); 576 fp_seed(); 577 kpreempt_enable(); 578 } 579 580 /* 581 * Ensure that the current hardware state is flushed back to the 582 * pcb, then modify that copy. Next use of the fp will 583 * restore the context. 584 */ 585 fp_save(fp); 586 587 #if defined(__amd64) 588 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 589 fx->fx_fcw = fcw; 590 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 591 #else 592 switch (fp_kind) { 593 case __FP_SSE: 594 fx = &fp->fpu_regs.kfpu_u.kfpu_fx; 595 fx->fx_fcw = fcw; 596 fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 597 break; 598 default: 599 fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw; 600 break; 601 } 602 #endif 603 } 604