1 /*- 2 * Copyright (c) 1990 William Jolitz. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)npx.c 7.2 (Berkeley) 5/12/91 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_debug_npx.h" 41 #include "opt_isa.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/bus.h> 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/module.h> 50 #include <sys/mutex.h> 51 #include <sys/mutex.h> 52 #include <sys/proc.h> 53 #include <sys/sysctl.h> 54 #include <machine/bus.h> 55 #include <sys/rman.h> 56 #ifdef NPX_DEBUG 57 #include <sys/syslog.h> 58 #endif 59 #include <sys/signalvar.h> 60 #include <sys/user.h> 61 62 #include <machine/cputypes.h> 63 #include <machine/frame.h> 64 #include <machine/md_var.h> 65 #include <machine/pcb.h> 66 #include <machine/psl.h> 67 #include <machine/resource.h> 68 #include <machine/specialreg.h> 69 #include <machine/segments.h> 70 #include <machine/ucontext.h> 71 72 #include <amd64/isa/intr_machdep.h> 73 #ifdef DEV_ISA 74 #include <isa/isavar.h> 75 #endif 76 77 /* 78 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 79 */ 80 81 #if defined(__GNUC__) && !defined(lint) 82 83 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 84 #define fnclex() __asm("fnclex") 85 #define fninit() __asm("fninit") 86 #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 87 #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 88 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) 89 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 90 #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ 91 : : "n" (CR0_TS) : "ax") 92 #define stop_emulating() __asm("clts") 93 94 #else /* not __GNUC__ */ 95 96 void fldcw(caddr_t addr); 97 void fnclex(void); 98 void fninit(void); 99 void fnstcw(caddr_t addr); 100 void fnstsw(caddr_t addr); 101 void fxsave(caddr_t addr); 102 void fxrstor(caddr_t addr); 103 void start_emulating(void); 104 void stop_emulating(void); 105 106 #endif /* __GNUC__ */ 107 108 #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) 109 #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) 110 111 typedef u_char bool_t; 112 113 static int npx_attach(device_t dev); 114 static void npx_identify(driver_t *driver, device_t parent); 115 static int npx_probe(device_t dev); 116 117 int hw_float = 1; 118 SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, 119 CTLFLAG_RD, &hw_float, 0, 120 "Floatingpoint instructions executed in hardware"); 121 122 static struct savefpu npx_cleanstate; 123 static bool_t npx_cleanstate_ready; 124 125 /* 126 * Identify routine. Create a connection point on our parent for probing. 127 */ 128 static void 129 npx_identify(driver, parent) 130 driver_t *driver; 131 device_t parent; 132 { 133 device_t child; 134 135 child = BUS_ADD_CHILD(parent, 0, "npx", 0); 136 if (child == NULL) 137 panic("npx_identify"); 138 } 139 140 /* 141 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 142 * whether the device exists or not (XXX should be elsewhere). 143 * Modify device struct if npx doesn't need to use interrupts. 144 * Return 0 if device exists. 145 */ 146 static int 147 npx_probe(dev) 148 device_t dev; 149 { 150 151 /* 152 * Partially reset the coprocessor, if any. Some BIOS's don't reset 153 * it after a warm boot. 154 */ 155 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 156 outb(0xf0, 0); /* clear BUSY# latch */ 157 /* 158 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 159 * instructions. We must set the CR0_MP bit and use the CR0_TS 160 * bit to control the trap, because setting the CR0_EM bit does 161 * not cause WAIT instructions to trap. It's important to trap 162 * WAIT instructions - otherwise the "wait" variants of no-wait 163 * control instructions would degenerate to the "no-wait" variants 164 * after FP context switches but work correctly otherwise. It's 165 * particularly important to trap WAITs when there is no NPX - 166 * otherwise the "wait" variants would always degenerate. 167 * 168 * Try setting CR0_NE to get correct error reporting on 486DX's. 169 * Setting it should fail or do nothing on lesser processors. 170 */ 171 load_cr0(rcr0() | CR0_MP | CR0_NE); 172 /* 173 * But don't trap while we're probing. 174 */ 175 stop_emulating(); 176 /* 177 * Finish resetting the coprocessor. 178 */ 179 fninit(); 180 181 device_set_desc(dev, "math processor"); 182 183 return (0); 184 } 185 186 /* 187 * Attach routine - announce which it is, and wire into system 188 */ 189 static int 190 npx_attach(dev) 191 device_t dev; 192 { 193 register_t s; 194 195 device_printf(dev, "INT 16 interface\n"); 196 npxinit(__INITIAL_NPXCW__); 197 198 if (npx_cleanstate_ready == 0) { 199 s = intr_disable(); 200 stop_emulating(); 201 fxsave(&npx_cleanstate); 202 start_emulating(); 203 npx_cleanstate_ready = 1; 204 intr_restore(s); 205 } 206 return (0); /* XXX unused */ 207 } 208 209 /* 210 * Initialize floating point unit. 211 */ 212 void 213 npxinit(control) 214 u_short control; 215 { 216 static struct savefpu dummy; 217 register_t savecrit; 218 219 /* 220 * fninit has the same h/w bugs as fnsave. Use the detoxified 221 * fnsave to throw away any junk in the fpu. npxsave() initializes 222 * the fpu and sets fpcurthread = NULL as important side effects. 223 */ 224 savecrit = intr_disable(); 225 npxsave(&dummy); 226 stop_emulating(); 227 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ 228 fninit(); 229 fldcw(&control); 230 start_emulating(); 231 intr_restore(savecrit); 232 } 233 234 /* 235 * Free coprocessor (if we have it). 236 */ 237 void 238 npxexit(td) 239 struct thread *td; 240 { 241 #ifdef NPX_DEBUG 242 u_int masked_exceptions; 243 #endif 244 register_t savecrit; 245 246 savecrit = intr_disable(); 247 if (curthread == PCPU_GET(fpcurthread)) 248 npxsave(&PCPU_GET(curpcb)->pcb_save); 249 intr_restore(savecrit); 250 #ifdef NPX_DEBUG 251 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; 252 /* 253 * Log exceptions that would have trapped with the old 254 * control word (overflow, divide by 0, and invalid operand). 255 */ 256 if (masked_exceptions & 0x0d) 257 log(LOG_ERR, 258 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 259 td->td_proc->p_pid, td->td_proc->p_comm, 260 masked_exceptions); 261 #endif 262 } 263 264 int 265 npxformat() 266 { 267 268 return (_MC_FPFMT_XMM); 269 } 270 271 /* 272 * The following mechanism is used to ensure that the FPE_... value 273 * that is passed as a trapcode to the signal handler of the user 274 * process does not have more than one bit set. 275 * 276 * Multiple bits may be set if the user process modifies the control 277 * word while a status word bit is already set. While this is a sign 278 * of bad coding, we have no choise than to narrow them down to one 279 * bit, since we must not send a trapcode that is not exactly one of 280 * the FPE_ macros. 281 * 282 * The mechanism has a static table with 127 entries. Each combination 283 * of the 7 FPU status word exception bits directly translates to a 284 * position in this table, where a single FPE_... value is stored. 285 * This FPE_... value stored there is considered the "most important" 286 * of the exception bits and will be sent as the signal code. The 287 * precedence of the bits is based upon Intel Document "Numerical 288 * Applications", Chapter "Special Computational Situations". 289 * 290 * The macro to choose one of these values does these steps: 1) Throw 291 * away status word bits that cannot be masked. 2) Throw away the bits 292 * currently masked in the control word, assuming the user isn't 293 * interested in them anymore. 3) Reinsert status word bit 7 (stack 294 * fault) if it is set, which cannot be masked but must be presered. 295 * 4) Use the remaining bits to point into the trapcode table. 296 * 297 * The 6 maskable bits in order of their preference, as stated in the 298 * above referenced Intel manual: 299 * 1 Invalid operation (FP_X_INV) 300 * 1a Stack underflow 301 * 1b Stack overflow 302 * 1c Operand of unsupported format 303 * 1d SNaN operand. 304 * 2 QNaN operand (not an exception, irrelavant here) 305 * 3 Any other invalid-operation not mentioned above or zero divide 306 * (FP_X_INV, FP_X_DZ) 307 * 4 Denormal operand (FP_X_DNML) 308 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 309 * 6 Inexact result (FP_X_IMP) 310 */ 311 static char fpetable[128] = { 312 0, 313 FPE_FLTINV, /* 1 - INV */ 314 FPE_FLTUND, /* 2 - DNML */ 315 FPE_FLTINV, /* 3 - INV | DNML */ 316 FPE_FLTDIV, /* 4 - DZ */ 317 FPE_FLTINV, /* 5 - INV | DZ */ 318 FPE_FLTDIV, /* 6 - DNML | DZ */ 319 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 320 FPE_FLTOVF, /* 8 - OFL */ 321 FPE_FLTINV, /* 9 - INV | OFL */ 322 FPE_FLTUND, /* A - DNML | OFL */ 323 FPE_FLTINV, /* B - INV | DNML | OFL */ 324 FPE_FLTDIV, /* C - DZ | OFL */ 325 FPE_FLTINV, /* D - INV | DZ | OFL */ 326 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 327 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 328 FPE_FLTUND, /* 10 - UFL */ 329 FPE_FLTINV, /* 11 - INV | UFL */ 330 FPE_FLTUND, /* 12 - DNML | UFL */ 331 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 332 FPE_FLTDIV, /* 14 - DZ | UFL */ 333 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 334 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 335 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 336 FPE_FLTOVF, /* 18 - OFL | UFL */ 337 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 338 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 339 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 340 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 341 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 342 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 343 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 344 FPE_FLTRES, /* 20 - IMP */ 345 FPE_FLTINV, /* 21 - INV | IMP */ 346 FPE_FLTUND, /* 22 - DNML | IMP */ 347 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 348 FPE_FLTDIV, /* 24 - DZ | IMP */ 349 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 350 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 351 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 352 FPE_FLTOVF, /* 28 - OFL | IMP */ 353 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 354 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 355 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 356 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 357 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 358 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 359 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 360 FPE_FLTUND, /* 30 - UFL | IMP */ 361 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 362 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 363 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 364 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 365 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 366 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 367 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 368 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 369 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 370 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 371 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 372 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 373 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 374 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 375 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 376 FPE_FLTSUB, /* 40 - STK */ 377 FPE_FLTSUB, /* 41 - INV | STK */ 378 FPE_FLTUND, /* 42 - DNML | STK */ 379 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 380 FPE_FLTDIV, /* 44 - DZ | STK */ 381 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 382 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 383 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 384 FPE_FLTOVF, /* 48 - OFL | STK */ 385 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 386 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 387 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 388 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 389 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 390 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 391 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 392 FPE_FLTUND, /* 50 - UFL | STK */ 393 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 394 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 395 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 396 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 397 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 398 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 399 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 400 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 401 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 402 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 403 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 404 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 405 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 406 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 407 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 408 FPE_FLTRES, /* 60 - IMP | STK */ 409 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 410 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 411 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 412 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 413 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 414 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 415 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 416 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 417 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 418 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 419 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 420 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 421 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 422 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 423 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 424 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 425 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 426 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 427 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 428 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 429 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 430 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 431 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 432 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 433 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 434 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 435 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 436 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 437 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 438 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 439 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 440 }; 441 442 /* 443 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 444 * 445 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 446 * depend on longjmp() restoring a usable state. Restoring the state 447 * or examining it might fail if we didn't clear exceptions. 448 * 449 * The error code chosen will be one of the FPE_... macros. It will be 450 * sent as the second argument to old BSD-style signal handlers and as 451 * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. 452 * 453 * XXX the FP state is not preserved across signal handlers. So signal 454 * handlers cannot afford to do FP unless they preserve the state or 455 * longjmp() out. Both preserving the state and longjmp()ing may be 456 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 457 * solution for signals other than SIGFPE. 458 */ 459 int 460 npxtrap() 461 { 462 register_t savecrit; 463 u_short control, status; 464 465 savecrit = intr_disable(); 466 467 /* 468 * Interrupt handling (for another interrupt) may have pushed the 469 * state to memory. Fetch the relevant parts of the state from 470 * wherever they are. 471 */ 472 if (PCPU_GET(fpcurthread) != curthread) { 473 control = GET_FPU_CW(curthread); 474 status = GET_FPU_SW(curthread); 475 } else { 476 fnstcw(&control); 477 fnstsw(&status); 478 } 479 480 if (PCPU_GET(fpcurthread) == curthread) 481 fnclex(); 482 intr_restore(savecrit); 483 return (fpetable[status & ((~control & 0x3f) | 0x40)]); 484 } 485 486 /* 487 * Implement device not available (DNA) exception 488 * 489 * It would be better to switch FP context here (if curthread != fpcurthread) 490 * and not necessarily for every context switch, but it is too hard to 491 * access foreign pcb's. 492 */ 493 494 static int err_count = 0; 495 496 int 497 npxdna() 498 { 499 struct pcb *pcb; 500 register_t s; 501 u_short control; 502 503 if (PCPU_GET(fpcurthread) == curthread) { 504 printf("npxdna: fpcurthread == curthread %d times\n", 505 ++err_count); 506 stop_emulating(); 507 return (1); 508 } 509 if (PCPU_GET(fpcurthread) != NULL) { 510 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", 511 PCPU_GET(fpcurthread), 512 PCPU_GET(fpcurthread)->td_proc->p_pid, 513 curthread, curthread->td_proc->p_pid); 514 panic("npxdna"); 515 } 516 s = intr_disable(); 517 stop_emulating(); 518 /* 519 * Record new context early in case frstor causes an IRQ13. 520 */ 521 PCPU_SET(fpcurthread, curthread); 522 pcb = PCPU_GET(curpcb); 523 524 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 525 /* 526 * This is the first time this thread has used the FPU or 527 * the PCB doesn't contain a clean FPU state. Explicitly 528 * initialize the FPU and load the default control word. 529 */ 530 fninit(); 531 control = __INITIAL_NPXCW__; 532 fldcw(&control); 533 pcb->pcb_flags |= PCB_NPXINITDONE; 534 } else { 535 /* 536 * The following frstor may cause a trap when the state 537 * being restored has a pending error. The error will 538 * appear to have been triggered by the current (npx) user 539 * instruction even when that instruction is a no-wait 540 * instruction that should not trigger an error (e.g., 541 * instructions are broken the same as frstor, so our 542 * treatment does not amplify the breakage. 543 */ 544 fxrstor(&pcb->pcb_save); 545 } 546 intr_restore(s); 547 548 return (1); 549 } 550 551 /* 552 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx 553 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by 554 * no-wait npx instructions. See the Intel application note AP-578 for 555 * details. This doesn't cause any additional complications here. IRQ13's 556 * are inherently asynchronous unless the CPU is frozen to deliver them -- 557 * one that started in userland may be delivered many instructions later, 558 * after the process has entered the kernel. It may even be delivered after 559 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in 560 * the same way as a very-late-arriving non-spurious IRQ13 from user mode: 561 * it is normally ignored at first because we set fpcurthread to NULL; it is 562 * normally retriggered in npxdna() after return to user mode. 563 * 564 * npxsave() must be called with interrupts disabled, so that it clears 565 * fpcurthread atomically with saving the state. We require callers to do the 566 * disabling, since most callers need to disable interrupts anyway to call 567 * npxsave() atomically with checking fpcurthread. 568 * 569 * A previous version of npxsave() went to great lengths to excecute fnsave 570 * with interrupts enabled in case executing it froze the CPU. This case 571 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply 572 * spurious freezes. 573 */ 574 void 575 npxsave(addr) 576 struct savefpu *addr; 577 { 578 579 stop_emulating(); 580 fxsave(addr); 581 582 start_emulating(); 583 PCPU_SET(fpcurthread, NULL); 584 } 585 586 /* 587 * This should be called with interrupts disabled and only when the owning 588 * FPU thread is non-null. 589 */ 590 void 591 npxdrop() 592 { 593 struct thread *td; 594 595 td = PCPU_GET(fpcurthread); 596 PCPU_SET(fpcurthread, NULL); 597 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; 598 start_emulating(); 599 } 600 601 /* 602 * Get the state of the FPU without dropping ownership (if possible). 603 * It returns the FPU ownership status. 604 */ 605 int 606 npxgetregs(td, addr) 607 struct thread *td; 608 struct savefpu *addr; 609 { 610 register_t s; 611 612 if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 613 if (npx_cleanstate_ready) 614 bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate)); 615 else 616 bzero(addr, sizeof(*addr)); 617 return (_MC_FPOWNED_NONE); 618 } 619 s = intr_disable(); 620 if (td == PCPU_GET(fpcurthread)) { 621 fxsave(addr); 622 intr_restore(s); 623 return (_MC_FPOWNED_FPU); 624 } else { 625 intr_restore(s); 626 bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); 627 return (_MC_FPOWNED_PCB); 628 } 629 } 630 631 /* 632 * Set the state of the FPU. 633 */ 634 void 635 npxsetregs(td, addr) 636 struct thread *td; 637 struct savefpu *addr; 638 { 639 register_t s; 640 641 s = intr_disable(); 642 if (td == PCPU_GET(fpcurthread)) { 643 fxrstor(addr); 644 intr_restore(s); 645 } else { 646 intr_restore(s); 647 bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); 648 } 649 curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; 650 } 651 652 static device_method_t npx_methods[] = { 653 /* Device interface */ 654 DEVMETHOD(device_identify, npx_identify), 655 DEVMETHOD(device_probe, npx_probe), 656 DEVMETHOD(device_attach, npx_attach), 657 DEVMETHOD(device_detach, bus_generic_detach), 658 DEVMETHOD(device_shutdown, bus_generic_shutdown), 659 DEVMETHOD(device_suspend, bus_generic_suspend), 660 DEVMETHOD(device_resume, bus_generic_resume), 661 662 { 0, 0 } 663 }; 664 665 static driver_t npx_driver = { 666 "npx", 667 npx_methods, 668 1, /* no softc */ 669 }; 670 671 static devclass_t npx_devclass; 672 673 /* 674 * We prefer to attach to the root nexus so that the usual case (exception 16) 675 * doesn't describe the processor as being `on isa'. 676 */ 677 DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); 678 679 #ifdef DEV_ISA 680 /* 681 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 682 */ 683 static struct isa_pnp_id npxisa_ids[] = { 684 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 685 { 0 } 686 }; 687 688 static int 689 npxisa_probe(device_t dev) 690 { 691 int result; 692 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { 693 device_quiet(dev); 694 } 695 return(result); 696 } 697 698 static int 699 npxisa_attach(device_t dev) 700 { 701 return (0); 702 } 703 704 static device_method_t npxisa_methods[] = { 705 /* Device interface */ 706 DEVMETHOD(device_probe, npxisa_probe), 707 DEVMETHOD(device_attach, npxisa_attach), 708 DEVMETHOD(device_detach, bus_generic_detach), 709 DEVMETHOD(device_shutdown, bus_generic_shutdown), 710 DEVMETHOD(device_suspend, bus_generic_suspend), 711 DEVMETHOD(device_resume, bus_generic_resume), 712 713 { 0, 0 } 714 }; 715 716 static driver_t npxisa_driver = { 717 "npxisa", 718 npxisa_methods, 719 1, /* no softc */ 720 }; 721 722 static devclass_t npxisa_devclass; 723 724 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); 725 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); 726 #endif /* DEV_ISA */ 727