1 /* 2 * Copyright (C) 1994 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * General FPU state handling cleanups 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 */ 8 #include <asm/fpu/internal.h> 9 #include <asm/fpu/regset.h> 10 #include <asm/fpu/signal.h> 11 #include <asm/traps.h> 12 13 #include <linux/hardirq.h> 14 15 /* 16 * Represents the initial FPU state. It's mostly (but not completely) zeroes, 17 * depending on the FPU hardware format: 18 */ 19 union fpregs_state init_fpstate __read_mostly; 20 21 /* 22 * Track whether the kernel is using the FPU state 23 * currently. 24 * 25 * This flag is used: 26 * 27 * - by IRQ context code to potentially use the FPU 28 * if it's unused. 29 * 30 * - to debug kernel_fpu_begin()/end() correctness 31 */ 32 static DEFINE_PER_CPU(bool, in_kernel_fpu); 33 34 /* 35 * Track which context is using the FPU on the CPU: 36 */ 37 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); 38 39 static void kernel_fpu_disable(void) 40 { 41 WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); 42 this_cpu_write(in_kernel_fpu, true); 43 } 44 45 static void kernel_fpu_enable(void) 46 { 47 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); 48 this_cpu_write(in_kernel_fpu, false); 49 } 50 51 static bool kernel_fpu_disabled(void) 52 { 53 return this_cpu_read(in_kernel_fpu); 54 } 55 56 /* 57 * Were we in an interrupt that interrupted kernel mode? 58 * 59 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that 60 * pair does nothing at all: the thread must not have fpu (so 61 * that we don't try to save the FPU state), and TS must 62 * be set (so that the clts/stts pair does nothing that is 63 * visible in the interrupted kernel thread). 64 * 65 * Except for the eagerfpu case when we return true; in the likely case 66 * the thread has FPU but we are not going to set/clear TS. 67 */ 68 static bool interrupted_kernel_fpu_idle(void) 69 { 70 if (kernel_fpu_disabled()) 71 return false; 72 73 if (use_eager_fpu()) 74 return true; 75 76 return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); 77 } 78 79 /* 80 * Were we in user mode (or vm86 mode) when we were 81 * interrupted? 82 * 83 * Doing kernel_fpu_begin/end() is ok if we are running 84 * in an interrupt context from user mode - we'll just 85 * save the FPU state as required. 86 */ 87 static bool interrupted_user_mode(void) 88 { 89 struct pt_regs *regs = get_irq_regs(); 90 return regs && user_mode(regs); 91 } 92 93 /* 94 * Can we use the FPU in kernel mode with the 95 * whole "kernel_fpu_begin/end()" sequence? 96 * 97 * It's always ok in process context (ie "not interrupt") 98 * but it is sometimes ok even from an irq. 99 */ 100 bool irq_fpu_usable(void) 101 { 102 return !in_interrupt() || 103 interrupted_user_mode() || 104 interrupted_kernel_fpu_idle(); 105 } 106 EXPORT_SYMBOL(irq_fpu_usable); 107 108 void __kernel_fpu_begin(void) 109 { 110 struct fpu *fpu = ¤t->thread.fpu; 111 112 WARN_ON_FPU(!irq_fpu_usable()); 113 114 kernel_fpu_disable(); 115 116 if (fpu->fpregs_active) { 117 copy_fpregs_to_fpstate(fpu); 118 } else { 119 this_cpu_write(fpu_fpregs_owner_ctx, NULL); 120 __fpregs_activate_hw(); 121 } 122 } 123 EXPORT_SYMBOL(__kernel_fpu_begin); 124 125 void __kernel_fpu_end(void) 126 { 127 struct fpu *fpu = ¤t->thread.fpu; 128 129 if (fpu->fpregs_active) 130 copy_kernel_to_fpregs(&fpu->state); 131 else 132 __fpregs_deactivate_hw(); 133 134 kernel_fpu_enable(); 135 } 136 EXPORT_SYMBOL(__kernel_fpu_end); 137 138 void kernel_fpu_begin(void) 139 { 140 preempt_disable(); 141 __kernel_fpu_begin(); 142 } 143 EXPORT_SYMBOL_GPL(kernel_fpu_begin); 144 145 void kernel_fpu_end(void) 146 { 147 __kernel_fpu_end(); 148 preempt_enable(); 149 } 150 EXPORT_SYMBOL_GPL(kernel_fpu_end); 151 152 /* 153 * CR0::TS save/restore functions: 154 */ 155 int irq_ts_save(void) 156 { 157 /* 158 * If in process context and not atomic, we can take a spurious DNA fault. 159 * Otherwise, doing clts() in process context requires disabling preemption 160 * or some heavy lifting like kernel_fpu_begin() 161 */ 162 if (!in_atomic()) 163 return 0; 164 165 if (read_cr0() & X86_CR0_TS) { 166 clts(); 167 return 1; 168 } 169 170 return 0; 171 } 172 EXPORT_SYMBOL_GPL(irq_ts_save); 173 174 void irq_ts_restore(int TS_state) 175 { 176 if (TS_state) 177 stts(); 178 } 179 EXPORT_SYMBOL_GPL(irq_ts_restore); 180 181 /* 182 * Save the FPU state (mark it for reload if necessary): 183 * 184 * This only ever gets called for the current task. 185 */ 186 void fpu__save(struct fpu *fpu) 187 { 188 WARN_ON_FPU(fpu != ¤t->thread.fpu); 189 190 preempt_disable(); 191 if (fpu->fpregs_active) { 192 if (!copy_fpregs_to_fpstate(fpu)) 193 fpregs_deactivate(fpu); 194 } 195 preempt_enable(); 196 } 197 EXPORT_SYMBOL_GPL(fpu__save); 198 199 /* 200 * Legacy x87 fpstate state init: 201 */ 202 static inline void fpstate_init_fstate(struct fregs_state *fp) 203 { 204 fp->cwd = 0xffff037fu; 205 fp->swd = 0xffff0000u; 206 fp->twd = 0xffffffffu; 207 fp->fos = 0xffff0000u; 208 } 209 210 void fpstate_init(union fpregs_state *state) 211 { 212 if (!cpu_has_fpu) { 213 fpstate_init_soft(&state->soft); 214 return; 215 } 216 217 memset(state, 0, xstate_size); 218 219 if (cpu_has_fxsr) 220 fpstate_init_fxstate(&state->fxsave); 221 else 222 fpstate_init_fstate(&state->fsave); 223 } 224 EXPORT_SYMBOL_GPL(fpstate_init); 225 226 /* 227 * Copy the current task's FPU state to a new task's FPU context. 228 * 229 * In both the 'eager' and the 'lazy' case we save hardware registers 230 * directly to the destination buffer. 231 */ 232 static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) 233 { 234 WARN_ON_FPU(src_fpu != ¤t->thread.fpu); 235 236 /* 237 * Don't let 'init optimized' areas of the XSAVE area 238 * leak into the child task: 239 */ 240 if (use_eager_fpu()) 241 memset(&dst_fpu->state.xsave, 0, xstate_size); 242 243 /* 244 * Save current FPU registers directly into the child 245 * FPU context, without any memory-to-memory copying. 246 * 247 * If the FPU context got destroyed in the process (FNSAVE 248 * done on old CPUs) then copy it back into the source 249 * context and mark the current task for lazy restore. 250 * 251 * We have to do all this with preemption disabled, 252 * mostly because of the FNSAVE case, because in that 253 * case we must not allow preemption in the window 254 * between the FNSAVE and us marking the context lazy. 255 * 256 * It shouldn't be an issue as even FNSAVE is plenty 257 * fast in terms of critical section length. 258 */ 259 preempt_disable(); 260 if (!copy_fpregs_to_fpstate(dst_fpu)) { 261 memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); 262 fpregs_deactivate(src_fpu); 263 } 264 preempt_enable(); 265 } 266 267 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 268 { 269 dst_fpu->counter = 0; 270 dst_fpu->fpregs_active = 0; 271 dst_fpu->last_cpu = -1; 272 273 if (src_fpu->fpstate_active) 274 fpu_copy(dst_fpu, src_fpu); 275 276 return 0; 277 } 278 279 /* 280 * Activate the current task's in-memory FPU context, 281 * if it has not been used before: 282 */ 283 void fpu__activate_curr(struct fpu *fpu) 284 { 285 WARN_ON_FPU(fpu != ¤t->thread.fpu); 286 287 if (!fpu->fpstate_active) { 288 fpstate_init(&fpu->state); 289 290 /* Safe to do for the current task: */ 291 fpu->fpstate_active = 1; 292 } 293 } 294 EXPORT_SYMBOL_GPL(fpu__activate_curr); 295 296 /* 297 * This function must be called before we read a task's fpstate. 298 * 299 * If the task has not used the FPU before then initialize its 300 * fpstate. 301 * 302 * If the task has used the FPU before then save it. 303 */ 304 void fpu__activate_fpstate_read(struct fpu *fpu) 305 { 306 /* 307 * If fpregs are active (in the current CPU), then 308 * copy them to the fpstate: 309 */ 310 if (fpu->fpregs_active) { 311 fpu__save(fpu); 312 } else { 313 if (!fpu->fpstate_active) { 314 fpstate_init(&fpu->state); 315 316 /* Safe to do for current and for stopped child tasks: */ 317 fpu->fpstate_active = 1; 318 } 319 } 320 } 321 322 /* 323 * This function must be called before we write a task's fpstate. 324 * 325 * If the task has used the FPU before then unlazy it. 326 * If the task has not used the FPU before then initialize its fpstate. 327 * 328 * After this function call, after registers in the fpstate are 329 * modified and the child task has woken up, the child task will 330 * restore the modified FPU state from the modified context. If we 331 * didn't clear its lazy status here then the lazy in-registers 332 * state pending on its former CPU could be restored, corrupting 333 * the modifications. 334 */ 335 void fpu__activate_fpstate_write(struct fpu *fpu) 336 { 337 /* 338 * Only stopped child tasks can be used to modify the FPU 339 * state in the fpstate buffer: 340 */ 341 WARN_ON_FPU(fpu == ¤t->thread.fpu); 342 343 if (fpu->fpstate_active) { 344 /* Invalidate any lazy state: */ 345 fpu->last_cpu = -1; 346 } else { 347 fpstate_init(&fpu->state); 348 349 /* Safe to do for stopped child tasks: */ 350 fpu->fpstate_active = 1; 351 } 352 } 353 354 /* 355 * 'fpu__restore()' is called to copy FPU registers from 356 * the FPU fpstate to the live hw registers and to activate 357 * access to the hardware registers, so that FPU instructions 358 * can be used afterwards. 359 * 360 * Must be called with kernel preemption disabled (for example 361 * with local interrupts disabled, as it is in the case of 362 * do_device_not_available()). 363 */ 364 void fpu__restore(struct fpu *fpu) 365 { 366 fpu__activate_curr(fpu); 367 368 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ 369 kernel_fpu_disable(); 370 fpregs_activate(fpu); 371 copy_kernel_to_fpregs(&fpu->state); 372 fpu->counter++; 373 kernel_fpu_enable(); 374 } 375 EXPORT_SYMBOL_GPL(fpu__restore); 376 377 /* 378 * Drops current FPU state: deactivates the fpregs and 379 * the fpstate. NOTE: it still leaves previous contents 380 * in the fpregs in the eager-FPU case. 381 * 382 * This function can be used in cases where we know that 383 * a state-restore is coming: either an explicit one, 384 * or a reschedule. 385 */ 386 void fpu__drop(struct fpu *fpu) 387 { 388 preempt_disable(); 389 fpu->counter = 0; 390 391 if (fpu->fpregs_active) { 392 /* Ignore delayed exceptions from user space */ 393 asm volatile("1: fwait\n" 394 "2:\n" 395 _ASM_EXTABLE(1b, 2b)); 396 fpregs_deactivate(fpu); 397 } 398 399 fpu->fpstate_active = 0; 400 401 preempt_enable(); 402 } 403 404 /* 405 * Clear FPU registers by setting them up from 406 * the init fpstate: 407 */ 408 static inline void copy_init_fpstate_to_fpregs(void) 409 { 410 if (use_xsave()) 411 copy_kernel_to_xregs(&init_fpstate.xsave, -1); 412 else 413 copy_kernel_to_fxregs(&init_fpstate.fxsave); 414 } 415 416 /* 417 * Clear the FPU state back to init state. 418 * 419 * Called by sys_execve(), by the signal handler code and by various 420 * error paths. 421 */ 422 void fpu__clear(struct fpu *fpu) 423 { 424 WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ 425 426 if (!use_eager_fpu()) { 427 /* FPU state will be reallocated lazily at the first use. */ 428 fpu__drop(fpu); 429 } else { 430 if (!fpu->fpstate_active) { 431 fpu__activate_curr(fpu); 432 user_fpu_begin(); 433 } 434 copy_init_fpstate_to_fpregs(); 435 } 436 } 437 438 /* 439 * x87 math exception handling: 440 */ 441 442 static inline unsigned short get_fpu_cwd(struct fpu *fpu) 443 { 444 if (cpu_has_fxsr) { 445 return fpu->state.fxsave.cwd; 446 } else { 447 return (unsigned short)fpu->state.fsave.cwd; 448 } 449 } 450 451 static inline unsigned short get_fpu_swd(struct fpu *fpu) 452 { 453 if (cpu_has_fxsr) { 454 return fpu->state.fxsave.swd; 455 } else { 456 return (unsigned short)fpu->state.fsave.swd; 457 } 458 } 459 460 static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) 461 { 462 if (cpu_has_xmm) { 463 return fpu->state.fxsave.mxcsr; 464 } else { 465 return MXCSR_DEFAULT; 466 } 467 } 468 469 int fpu__exception_code(struct fpu *fpu, int trap_nr) 470 { 471 int err; 472 473 if (trap_nr == X86_TRAP_MF) { 474 unsigned short cwd, swd; 475 /* 476 * (~cwd & swd) will mask out exceptions that are not set to unmasked 477 * status. 0x3f is the exception bits in these regs, 0x200 is the 478 * C1 reg you need in case of a stack fault, 0x040 is the stack 479 * fault bit. We should only be taking one exception at a time, 480 * so if this combination doesn't produce any single exception, 481 * then we have a bad program that isn't synchronizing its FPU usage 482 * and it will suffer the consequences since we won't be able to 483 * fully reproduce the context of the exception 484 */ 485 cwd = get_fpu_cwd(fpu); 486 swd = get_fpu_swd(fpu); 487 488 err = swd & ~cwd; 489 } else { 490 /* 491 * The SIMD FPU exceptions are handled a little differently, as there 492 * is only a single status/control register. Thus, to determine which 493 * unmasked exception was caught we must mask the exception mask bits 494 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 495 */ 496 unsigned short mxcsr = get_fpu_mxcsr(fpu); 497 err = ~(mxcsr >> 7) & mxcsr; 498 } 499 500 if (err & 0x001) { /* Invalid op */ 501 /* 502 * swd & 0x240 == 0x040: Stack Underflow 503 * swd & 0x240 == 0x240: Stack Overflow 504 * User must clear the SF bit (0x40) if set 505 */ 506 return FPE_FLTINV; 507 } else if (err & 0x004) { /* Divide by Zero */ 508 return FPE_FLTDIV; 509 } else if (err & 0x008) { /* Overflow */ 510 return FPE_FLTOVF; 511 } else if (err & 0x012) { /* Denormal, Underflow */ 512 return FPE_FLTUND; 513 } else if (err & 0x020) { /* Precision */ 514 return FPE_FLTRES; 515 } 516 517 /* 518 * If we're using IRQ 13, or supposedly even some trap 519 * X86_TRAP_MF implementations, it's possible 520 * we get a spurious trap, which is not an error. 521 */ 522 return 0; 523 } 524