1 /* 2 * Copyright (C) 1994 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * General FPU state handling cleanups 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 */ 8 #include <asm/fpu/internal.h> 9 #include <asm/fpu/regset.h> 10 #include <asm/fpu/signal.h> 11 #include <asm/traps.h> 12 13 #include <linux/hardirq.h> 14 15 /* 16 * Represents the initial FPU state. It's mostly (but not completely) zeroes, 17 * depending on the FPU hardware format: 18 */ 19 union fpregs_state init_fpstate __read_mostly; 20 21 /* 22 * Track whether the kernel is using the FPU state 23 * currently. 24 * 25 * This flag is used: 26 * 27 * - by IRQ context code to potentially use the FPU 28 * if it's unused. 29 * 30 * - to debug kernel_fpu_begin()/end() correctness 31 */ 32 static DEFINE_PER_CPU(bool, in_kernel_fpu); 33 34 /* 35 * Track which context is using the FPU on the CPU: 36 */ 37 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); 38 39 static void kernel_fpu_disable(void) 40 { 41 WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); 42 this_cpu_write(in_kernel_fpu, true); 43 } 44 45 static void kernel_fpu_enable(void) 46 { 47 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); 48 this_cpu_write(in_kernel_fpu, false); 49 } 50 51 static bool kernel_fpu_disabled(void) 52 { 53 return this_cpu_read(in_kernel_fpu); 54 } 55 56 /* 57 * Were we in an interrupt that interrupted kernel mode? 58 * 59 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that 60 * pair does nothing at all: the thread must not have fpu (so 61 * that we don't try to save the FPU state), and TS must 62 * be set (so that the clts/stts pair does nothing that is 63 * visible in the interrupted kernel thread). 64 * 65 * Except for the eagerfpu case when we return true; in the likely case 66 * the thread has FPU but we are not going to set/clear TS. 67 */ 68 static bool interrupted_kernel_fpu_idle(void) 69 { 70 if (kernel_fpu_disabled()) 71 return false; 72 73 if (use_eager_fpu()) 74 return true; 75 76 return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); 77 } 78 79 /* 80 * Were we in user mode (or vm86 mode) when we were 81 * interrupted? 82 * 83 * Doing kernel_fpu_begin/end() is ok if we are running 84 * in an interrupt context from user mode - we'll just 85 * save the FPU state as required. 86 */ 87 static bool interrupted_user_mode(void) 88 { 89 struct pt_regs *regs = get_irq_regs(); 90 return regs && user_mode(regs); 91 } 92 93 /* 94 * Can we use the FPU in kernel mode with the 95 * whole "kernel_fpu_begin/end()" sequence? 96 * 97 * It's always ok in process context (ie "not interrupt") 98 * but it is sometimes ok even from an irq. 99 */ 100 bool irq_fpu_usable(void) 101 { 102 return !in_interrupt() || 103 interrupted_user_mode() || 104 interrupted_kernel_fpu_idle(); 105 } 106 EXPORT_SYMBOL(irq_fpu_usable); 107 108 void __kernel_fpu_begin(void) 109 { 110 struct fpu *fpu = ¤t->thread.fpu; 111 112 WARN_ON_FPU(!irq_fpu_usable()); 113 114 kernel_fpu_disable(); 115 116 if (fpu->fpregs_active) { 117 /* 118 * Ignore return value -- we don't care if reg state 119 * is clobbered. 120 */ 121 copy_fpregs_to_fpstate(fpu); 122 } else { 123 this_cpu_write(fpu_fpregs_owner_ctx, NULL); 124 __fpregs_activate_hw(); 125 } 126 } 127 EXPORT_SYMBOL(__kernel_fpu_begin); 128 129 void __kernel_fpu_end(void) 130 { 131 struct fpu *fpu = ¤t->thread.fpu; 132 133 if (fpu->fpregs_active) 134 copy_kernel_to_fpregs(&fpu->state); 135 else 136 __fpregs_deactivate_hw(); 137 138 kernel_fpu_enable(); 139 } 140 EXPORT_SYMBOL(__kernel_fpu_end); 141 142 void kernel_fpu_begin(void) 143 { 144 preempt_disable(); 145 __kernel_fpu_begin(); 146 } 147 EXPORT_SYMBOL_GPL(kernel_fpu_begin); 148 149 void kernel_fpu_end(void) 150 { 151 __kernel_fpu_end(); 152 preempt_enable(); 153 } 154 EXPORT_SYMBOL_GPL(kernel_fpu_end); 155 156 /* 157 * CR0::TS save/restore functions: 158 */ 159 int irq_ts_save(void) 160 { 161 /* 162 * If in process context and not atomic, we can take a spurious DNA fault. 163 * Otherwise, doing clts() in process context requires disabling preemption 164 * or some heavy lifting like kernel_fpu_begin() 165 */ 166 if (!in_atomic()) 167 return 0; 168 169 if (read_cr0() & X86_CR0_TS) { 170 clts(); 171 return 1; 172 } 173 174 return 0; 175 } 176 EXPORT_SYMBOL_GPL(irq_ts_save); 177 178 void irq_ts_restore(int TS_state) 179 { 180 if (TS_state) 181 stts(); 182 } 183 EXPORT_SYMBOL_GPL(irq_ts_restore); 184 185 /* 186 * Save the FPU state (mark it for reload if necessary): 187 * 188 * This only ever gets called for the current task. 189 */ 190 void fpu__save(struct fpu *fpu) 191 { 192 WARN_ON_FPU(fpu != ¤t->thread.fpu); 193 194 preempt_disable(); 195 if (fpu->fpregs_active) { 196 if (!copy_fpregs_to_fpstate(fpu)) { 197 if (use_eager_fpu()) 198 copy_kernel_to_fpregs(&fpu->state); 199 else 200 fpregs_deactivate(fpu); 201 } 202 } 203 preempt_enable(); 204 } 205 EXPORT_SYMBOL_GPL(fpu__save); 206 207 /* 208 * Legacy x87 fpstate state init: 209 */ 210 static inline void fpstate_init_fstate(struct fregs_state *fp) 211 { 212 fp->cwd = 0xffff037fu; 213 fp->swd = 0xffff0000u; 214 fp->twd = 0xffffffffu; 215 fp->fos = 0xffff0000u; 216 } 217 218 void fpstate_init(union fpregs_state *state) 219 { 220 if (!cpu_has_fpu) { 221 fpstate_init_soft(&state->soft); 222 return; 223 } 224 225 memset(state, 0, xstate_size); 226 227 if (cpu_has_fxsr) 228 fpstate_init_fxstate(&state->fxsave); 229 else 230 fpstate_init_fstate(&state->fsave); 231 } 232 EXPORT_SYMBOL_GPL(fpstate_init); 233 234 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 235 { 236 dst_fpu->counter = 0; 237 dst_fpu->fpregs_active = 0; 238 dst_fpu->last_cpu = -1; 239 240 if (!src_fpu->fpstate_active || !cpu_has_fpu) 241 return 0; 242 243 WARN_ON_FPU(src_fpu != ¤t->thread.fpu); 244 245 /* 246 * Don't let 'init optimized' areas of the XSAVE area 247 * leak into the child task: 248 */ 249 if (use_eager_fpu()) 250 memset(&dst_fpu->state.xsave, 0, xstate_size); 251 252 /* 253 * Save current FPU registers directly into the child 254 * FPU context, without any memory-to-memory copying. 255 * In lazy mode, if the FPU context isn't loaded into 256 * fpregs, CR0.TS will be set and do_device_not_available 257 * will load the FPU context. 258 * 259 * We have to do all this with preemption disabled, 260 * mostly because of the FNSAVE case, because in that 261 * case we must not allow preemption in the window 262 * between the FNSAVE and us marking the context lazy. 263 * 264 * It shouldn't be an issue as even FNSAVE is plenty 265 * fast in terms of critical section length. 266 */ 267 preempt_disable(); 268 if (!copy_fpregs_to_fpstate(dst_fpu)) { 269 memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); 270 271 if (use_eager_fpu()) 272 copy_kernel_to_fpregs(&src_fpu->state); 273 else 274 fpregs_deactivate(src_fpu); 275 } 276 preempt_enable(); 277 278 return 0; 279 } 280 281 /* 282 * Activate the current task's in-memory FPU context, 283 * if it has not been used before: 284 */ 285 void fpu__activate_curr(struct fpu *fpu) 286 { 287 WARN_ON_FPU(fpu != ¤t->thread.fpu); 288 289 if (!fpu->fpstate_active) { 290 fpstate_init(&fpu->state); 291 292 /* Safe to do for the current task: */ 293 fpu->fpstate_active = 1; 294 } 295 } 296 EXPORT_SYMBOL_GPL(fpu__activate_curr); 297 298 /* 299 * This function must be called before we read a task's fpstate. 300 * 301 * If the task has not used the FPU before then initialize its 302 * fpstate. 303 * 304 * If the task has used the FPU before then save it. 305 */ 306 void fpu__activate_fpstate_read(struct fpu *fpu) 307 { 308 /* 309 * If fpregs are active (in the current CPU), then 310 * copy them to the fpstate: 311 */ 312 if (fpu->fpregs_active) { 313 fpu__save(fpu); 314 } else { 315 if (!fpu->fpstate_active) { 316 fpstate_init(&fpu->state); 317 318 /* Safe to do for current and for stopped child tasks: */ 319 fpu->fpstate_active = 1; 320 } 321 } 322 } 323 324 /* 325 * This function must be called before we write a task's fpstate. 326 * 327 * If the task has used the FPU before then unlazy it. 328 * If the task has not used the FPU before then initialize its fpstate. 329 * 330 * After this function call, after registers in the fpstate are 331 * modified and the child task has woken up, the child task will 332 * restore the modified FPU state from the modified context. If we 333 * didn't clear its lazy status here then the lazy in-registers 334 * state pending on its former CPU could be restored, corrupting 335 * the modifications. 336 */ 337 void fpu__activate_fpstate_write(struct fpu *fpu) 338 { 339 /* 340 * Only stopped child tasks can be used to modify the FPU 341 * state in the fpstate buffer: 342 */ 343 WARN_ON_FPU(fpu == ¤t->thread.fpu); 344 345 if (fpu->fpstate_active) { 346 /* Invalidate any lazy state: */ 347 fpu->last_cpu = -1; 348 } else { 349 fpstate_init(&fpu->state); 350 351 /* Safe to do for stopped child tasks: */ 352 fpu->fpstate_active = 1; 353 } 354 } 355 356 /* 357 * 'fpu__restore()' is called to copy FPU registers from 358 * the FPU fpstate to the live hw registers and to activate 359 * access to the hardware registers, so that FPU instructions 360 * can be used afterwards. 361 * 362 * Must be called with kernel preemption disabled (for example 363 * with local interrupts disabled, as it is in the case of 364 * do_device_not_available()). 365 */ 366 void fpu__restore(struct fpu *fpu) 367 { 368 fpu__activate_curr(fpu); 369 370 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ 371 kernel_fpu_disable(); 372 fpregs_activate(fpu); 373 copy_kernel_to_fpregs(&fpu->state); 374 fpu->counter++; 375 kernel_fpu_enable(); 376 } 377 EXPORT_SYMBOL_GPL(fpu__restore); 378 379 /* 380 * Drops current FPU state: deactivates the fpregs and 381 * the fpstate. NOTE: it still leaves previous contents 382 * in the fpregs in the eager-FPU case. 383 * 384 * This function can be used in cases where we know that 385 * a state-restore is coming: either an explicit one, 386 * or a reschedule. 387 */ 388 void fpu__drop(struct fpu *fpu) 389 { 390 preempt_disable(); 391 fpu->counter = 0; 392 393 if (fpu->fpregs_active) { 394 /* Ignore delayed exceptions from user space */ 395 asm volatile("1: fwait\n" 396 "2:\n" 397 _ASM_EXTABLE(1b, 2b)); 398 fpregs_deactivate(fpu); 399 } 400 401 fpu->fpstate_active = 0; 402 403 preempt_enable(); 404 } 405 406 /* 407 * Clear FPU registers by setting them up from 408 * the init fpstate: 409 */ 410 static inline void copy_init_fpstate_to_fpregs(void) 411 { 412 if (use_xsave()) 413 copy_kernel_to_xregs(&init_fpstate.xsave, -1); 414 else if (static_cpu_has(X86_FEATURE_FXSR)) 415 copy_kernel_to_fxregs(&init_fpstate.fxsave); 416 else 417 copy_kernel_to_fregs(&init_fpstate.fsave); 418 } 419 420 /* 421 * Clear the FPU state back to init state. 422 * 423 * Called by sys_execve(), by the signal handler code and by various 424 * error paths. 425 */ 426 void fpu__clear(struct fpu *fpu) 427 { 428 WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ 429 430 if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { 431 /* FPU state will be reallocated lazily at the first use. */ 432 fpu__drop(fpu); 433 } else { 434 if (!fpu->fpstate_active) { 435 fpu__activate_curr(fpu); 436 user_fpu_begin(); 437 } 438 copy_init_fpstate_to_fpregs(); 439 } 440 } 441 442 /* 443 * x87 math exception handling: 444 */ 445 446 static inline unsigned short get_fpu_cwd(struct fpu *fpu) 447 { 448 if (cpu_has_fxsr) { 449 return fpu->state.fxsave.cwd; 450 } else { 451 return (unsigned short)fpu->state.fsave.cwd; 452 } 453 } 454 455 static inline unsigned short get_fpu_swd(struct fpu *fpu) 456 { 457 if (cpu_has_fxsr) { 458 return fpu->state.fxsave.swd; 459 } else { 460 return (unsigned short)fpu->state.fsave.swd; 461 } 462 } 463 464 static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) 465 { 466 if (cpu_has_xmm) { 467 return fpu->state.fxsave.mxcsr; 468 } else { 469 return MXCSR_DEFAULT; 470 } 471 } 472 473 int fpu__exception_code(struct fpu *fpu, int trap_nr) 474 { 475 int err; 476 477 if (trap_nr == X86_TRAP_MF) { 478 unsigned short cwd, swd; 479 /* 480 * (~cwd & swd) will mask out exceptions that are not set to unmasked 481 * status. 0x3f is the exception bits in these regs, 0x200 is the 482 * C1 reg you need in case of a stack fault, 0x040 is the stack 483 * fault bit. We should only be taking one exception at a time, 484 * so if this combination doesn't produce any single exception, 485 * then we have a bad program that isn't synchronizing its FPU usage 486 * and it will suffer the consequences since we won't be able to 487 * fully reproduce the context of the exception 488 */ 489 cwd = get_fpu_cwd(fpu); 490 swd = get_fpu_swd(fpu); 491 492 err = swd & ~cwd; 493 } else { 494 /* 495 * The SIMD FPU exceptions are handled a little differently, as there 496 * is only a single status/control register. Thus, to determine which 497 * unmasked exception was caught we must mask the exception mask bits 498 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 499 */ 500 unsigned short mxcsr = get_fpu_mxcsr(fpu); 501 err = ~(mxcsr >> 7) & mxcsr; 502 } 503 504 if (err & 0x001) { /* Invalid op */ 505 /* 506 * swd & 0x240 == 0x040: Stack Underflow 507 * swd & 0x240 == 0x240: Stack Overflow 508 * User must clear the SF bit (0x40) if set 509 */ 510 return FPE_FLTINV; 511 } else if (err & 0x004) { /* Divide by Zero */ 512 return FPE_FLTDIV; 513 } else if (err & 0x008) { /* Overflow */ 514 return FPE_FLTOVF; 515 } else if (err & 0x012) { /* Denormal, Underflow */ 516 return FPE_FLTUND; 517 } else if (err & 0x020) { /* Precision */ 518 return FPE_FLTRES; 519 } 520 521 /* 522 * If we're using IRQ 13, or supposedly even some trap 523 * X86_TRAP_MF implementations, it's possible 524 * we get a spurious trap, which is not an error. 525 */ 526 return 0; 527 } 528