1 /*- 2 * Copyright (c) 1990 William Jolitz. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 35 * $FreeBSD$ 36 */ 37 38 #include "opt_debug_npx.h" 39 #include "opt_isa.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/bus.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/module.h> 48 #include <sys/mutex.h> 49 #include <sys/mutex.h> 50 #include <sys/proc.h> 51 #include <sys/sysctl.h> 52 #include <machine/bus.h> 53 #include <sys/rman.h> 54 #ifdef NPX_DEBUG 55 #include <sys/syslog.h> 56 #endif 57 #include <sys/signalvar.h> 58 #include <sys/user.h> 59 60 #include <machine/cputypes.h> 61 #include <machine/frame.h> 62 #include <machine/md_var.h> 63 #include <machine/pcb.h> 64 #include <machine/psl.h> 65 #include <machine/resource.h> 66 #include <machine/specialreg.h> 67 #include <machine/segments.h> 68 #include <machine/ucontext.h> 69 70 #include <amd64/isa/intr_machdep.h> 71 #ifdef DEV_ISA 72 #include <isa/isavar.h> 73 #endif 74 75 /* 76 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 77 */ 78 79 #if defined(__GNUC__) && !defined(lint) 80 81 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 82 #define fnclex() __asm("fnclex") 83 #define fninit() __asm("fninit") 84 #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 85 #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 86 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) 87 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 88 #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ 89 : : "n" (CR0_TS) : "ax") 90 #define stop_emulating() __asm("clts") 91 92 #else /* not __GNUC__ */ 93 94 void fldcw(caddr_t addr); 95 void fnclex(void); 96 void fninit(void); 97 void fnstcw(caddr_t addr); 98 void fnstsw(caddr_t addr); 99 void fxsave(caddr_t addr); 100 void fxrstor(caddr_t addr); 101 void start_emulating(void); 102 void stop_emulating(void); 103 104 #endif /* __GNUC__ */ 105 106 #define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) 107 #define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) 108 109 typedef u_char bool_t; 110 111 static int npx_attach(device_t dev); 112 static void npx_identify(driver_t *driver, device_t parent); 113 static int npx_probe(device_t dev); 114 115 int hw_float = 1; 116 SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, 117 CTLFLAG_RD, &hw_float, 0, 118 "Floatingpoint instructions executed in hardware"); 119 120 static struct savefpu npx_cleanstate; 121 static bool_t npx_cleanstate_ready; 122 123 /* 124 * Identify routine. Create a connection point on our parent for probing. 125 */ 126 static void 127 npx_identify(driver, parent) 128 driver_t *driver; 129 device_t parent; 130 { 131 device_t child; 132 133 child = BUS_ADD_CHILD(parent, 0, "npx", 0); 134 if (child == NULL) 135 panic("npx_identify"); 136 } 137 138 /* 139 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 140 * whether the device exists or not (XXX should be elsewhere). 141 * Modify device struct if npx doesn't need to use interrupts. 142 * Return 0 if device exists. 143 */ 144 static int 145 npx_probe(dev) 146 device_t dev; 147 { 148 149 /* 150 * Partially reset the coprocessor, if any. Some BIOS's don't reset 151 * it after a warm boot. 152 */ 153 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 154 outb(0xf0, 0); /* clear BUSY# latch */ 155 /* 156 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 157 * instructions. We must set the CR0_MP bit and use the CR0_TS 158 * bit to control the trap, because setting the CR0_EM bit does 159 * not cause WAIT instructions to trap. It's important to trap 160 * WAIT instructions - otherwise the "wait" variants of no-wait 161 * control instructions would degenerate to the "no-wait" variants 162 * after FP context switches but work correctly otherwise. It's 163 * particularly important to trap WAITs when there is no NPX - 164 * otherwise the "wait" variants would always degenerate. 165 * 166 * Try setting CR0_NE to get correct error reporting on 486DX's. 167 * Setting it should fail or do nothing on lesser processors. 168 */ 169 load_cr0(rcr0() | CR0_MP | CR0_NE); 170 /* 171 * But don't trap while we're probing. 172 */ 173 stop_emulating(); 174 /* 175 * Finish resetting the coprocessor. 176 */ 177 fninit(); 178 179 device_set_desc(dev, "math processor"); 180 181 return (0); 182 } 183 184 /* 185 * Attach routine - announce which it is, and wire into system 186 */ 187 static int 188 npx_attach(dev) 189 device_t dev; 190 { 191 register_t s; 192 193 device_printf(dev, "INT 16 interface\n"); 194 npxinit(__INITIAL_NPXCW__); 195 196 if (npx_cleanstate_ready == 0) { 197 s = intr_disable(); 198 stop_emulating(); 199 fxsave(&npx_cleanstate); 200 start_emulating(); 201 npx_cleanstate_ready = 1; 202 intr_restore(s); 203 } 204 return (0); /* XXX unused */ 205 } 206 207 /* 208 * Initialize floating point unit. 209 */ 210 void 211 npxinit(control) 212 u_short control; 213 { 214 static struct savefpu dummy; 215 register_t savecrit; 216 217 /* 218 * fninit has the same h/w bugs as fnsave. Use the detoxified 219 * fnsave to throw away any junk in the fpu. npxsave() initializes 220 * the fpu and sets fpcurthread = NULL as important side effects. 221 */ 222 savecrit = intr_disable(); 223 npxsave(&dummy); 224 stop_emulating(); 225 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ 226 fninit(); 227 fldcw(&control); 228 start_emulating(); 229 intr_restore(savecrit); 230 } 231 232 /* 233 * Free coprocessor (if we have it). 234 */ 235 void 236 npxexit(td) 237 struct thread *td; 238 { 239 #ifdef NPX_DEBUG 240 u_int masked_exceptions; 241 #endif 242 register_t savecrit; 243 244 savecrit = intr_disable(); 245 if (curthread == PCPU_GET(fpcurthread)) 246 npxsave(&PCPU_GET(curpcb)->pcb_save); 247 intr_restore(savecrit); 248 #ifdef NPX_DEBUG 249 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; 250 /* 251 * Log exceptions that would have trapped with the old 252 * control word (overflow, divide by 0, and invalid operand). 253 */ 254 if (masked_exceptions & 0x0d) 255 log(LOG_ERR, 256 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 257 td->td_proc->p_pid, td->td_proc->p_comm, 258 masked_exceptions); 259 #endif 260 } 261 262 int 263 npxformat() 264 { 265 266 return (_MC_FPFMT_XMM); 267 } 268 269 /* 270 * The following mechanism is used to ensure that the FPE_... value 271 * that is passed as a trapcode to the signal handler of the user 272 * process does not have more than one bit set. 273 * 274 * Multiple bits may be set if the user process modifies the control 275 * word while a status word bit is already set. While this is a sign 276 * of bad coding, we have no choise than to narrow them down to one 277 * bit, since we must not send a trapcode that is not exactly one of 278 * the FPE_ macros. 279 * 280 * The mechanism has a static table with 127 entries. Each combination 281 * of the 7 FPU status word exception bits directly translates to a 282 * position in this table, where a single FPE_... value is stored. 283 * This FPE_... value stored there is considered the "most important" 284 * of the exception bits and will be sent as the signal code. The 285 * precedence of the bits is based upon Intel Document "Numerical 286 * Applications", Chapter "Special Computational Situations". 287 * 288 * The macro to choose one of these values does these steps: 1) Throw 289 * away status word bits that cannot be masked. 2) Throw away the bits 290 * currently masked in the control word, assuming the user isn't 291 * interested in them anymore. 3) Reinsert status word bit 7 (stack 292 * fault) if it is set, which cannot be masked but must be presered. 293 * 4) Use the remaining bits to point into the trapcode table. 294 * 295 * The 6 maskable bits in order of their preference, as stated in the 296 * above referenced Intel manual: 297 * 1 Invalid operation (FP_X_INV) 298 * 1a Stack underflow 299 * 1b Stack overflow 300 * 1c Operand of unsupported format 301 * 1d SNaN operand. 302 * 2 QNaN operand (not an exception, irrelavant here) 303 * 3 Any other invalid-operation not mentioned above or zero divide 304 * (FP_X_INV, FP_X_DZ) 305 * 4 Denormal operand (FP_X_DNML) 306 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 307 * 6 Inexact result (FP_X_IMP) 308 */ 309 static char fpetable[128] = { 310 0, 311 FPE_FLTINV, /* 1 - INV */ 312 FPE_FLTUND, /* 2 - DNML */ 313 FPE_FLTINV, /* 3 - INV | DNML */ 314 FPE_FLTDIV, /* 4 - DZ */ 315 FPE_FLTINV, /* 5 - INV | DZ */ 316 FPE_FLTDIV, /* 6 - DNML | DZ */ 317 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 318 FPE_FLTOVF, /* 8 - OFL */ 319 FPE_FLTINV, /* 9 - INV | OFL */ 320 FPE_FLTUND, /* A - DNML | OFL */ 321 FPE_FLTINV, /* B - INV | DNML | OFL */ 322 FPE_FLTDIV, /* C - DZ | OFL */ 323 FPE_FLTINV, /* D - INV | DZ | OFL */ 324 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 325 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 326 FPE_FLTUND, /* 10 - UFL */ 327 FPE_FLTINV, /* 11 - INV | UFL */ 328 FPE_FLTUND, /* 12 - DNML | UFL */ 329 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 330 FPE_FLTDIV, /* 14 - DZ | UFL */ 331 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 332 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 333 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 334 FPE_FLTOVF, /* 18 - OFL | UFL */ 335 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 336 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 337 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 338 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 339 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 340 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 341 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 342 FPE_FLTRES, /* 20 - IMP */ 343 FPE_FLTINV, /* 21 - INV | IMP */ 344 FPE_FLTUND, /* 22 - DNML | IMP */ 345 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 346 FPE_FLTDIV, /* 24 - DZ | IMP */ 347 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 348 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 349 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 350 FPE_FLTOVF, /* 28 - OFL | IMP */ 351 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 352 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 353 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 354 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 355 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 356 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 357 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 358 FPE_FLTUND, /* 30 - UFL | IMP */ 359 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 360 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 361 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 362 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 363 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 364 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 365 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 366 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 367 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 368 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 369 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 370 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 371 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 372 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 373 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 374 FPE_FLTSUB, /* 40 - STK */ 375 FPE_FLTSUB, /* 41 - INV | STK */ 376 FPE_FLTUND, /* 42 - DNML | STK */ 377 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 378 FPE_FLTDIV, /* 44 - DZ | STK */ 379 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 380 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 381 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 382 FPE_FLTOVF, /* 48 - OFL | STK */ 383 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 384 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 385 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 386 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 387 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 388 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 389 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 390 FPE_FLTUND, /* 50 - UFL | STK */ 391 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 392 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 393 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 394 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 395 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 396 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 397 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 398 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 399 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 400 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 401 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 402 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 403 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 404 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 405 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 406 FPE_FLTRES, /* 60 - IMP | STK */ 407 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 408 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 409 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 410 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 411 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 412 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 413 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 414 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 415 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 416 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 417 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 418 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 419 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 420 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 421 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 422 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 423 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 424 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 425 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 426 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 427 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 428 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 429 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 430 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 431 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 432 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 433 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 434 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 435 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 436 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 437 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 438 }; 439 440 /* 441 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 442 * 443 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 444 * depend on longjmp() restoring a usable state. Restoring the state 445 * or examining it might fail if we didn't clear exceptions. 446 * 447 * The error code chosen will be one of the FPE_... macros. It will be 448 * sent as the second argument to old BSD-style signal handlers and as 449 * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. 450 * 451 * XXX the FP state is not preserved across signal handlers. So signal 452 * handlers cannot afford to do FP unless they preserve the state or 453 * longjmp() out. Both preserving the state and longjmp()ing may be 454 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 455 * solution for signals other than SIGFPE. 456 */ 457 int 458 npxtrap() 459 { 460 register_t savecrit; 461 u_short control, status; 462 463 savecrit = intr_disable(); 464 465 /* 466 * Interrupt handling (for another interrupt) may have pushed the 467 * state to memory. Fetch the relevant parts of the state from 468 * wherever they are. 469 */ 470 if (PCPU_GET(fpcurthread) != curthread) { 471 control = GET_FPU_CW(curthread); 472 status = GET_FPU_SW(curthread); 473 } else { 474 fnstcw(&control); 475 fnstsw(&status); 476 } 477 478 if (PCPU_GET(fpcurthread) == curthread) 479 fnclex(); 480 intr_restore(savecrit); 481 return (fpetable[status & ((~control & 0x3f) | 0x40)]); 482 } 483 484 /* 485 * Implement device not available (DNA) exception 486 * 487 * It would be better to switch FP context here (if curthread != fpcurthread) 488 * and not necessarily for every context switch, but it is too hard to 489 * access foreign pcb's. 490 */ 491 492 static int err_count = 0; 493 494 int 495 npxdna() 496 { 497 struct pcb *pcb; 498 register_t s; 499 u_short control; 500 501 if (PCPU_GET(fpcurthread) == curthread) { 502 printf("npxdna: fpcurthread == curthread %d times\n", 503 ++err_count); 504 stop_emulating(); 505 return (1); 506 } 507 if (PCPU_GET(fpcurthread) != NULL) { 508 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", 509 PCPU_GET(fpcurthread), 510 PCPU_GET(fpcurthread)->td_proc->p_pid, 511 curthread, curthread->td_proc->p_pid); 512 panic("npxdna"); 513 } 514 s = intr_disable(); 515 stop_emulating(); 516 /* 517 * Record new context early in case frstor causes an IRQ13. 518 */ 519 PCPU_SET(fpcurthread, curthread); 520 pcb = PCPU_GET(curpcb); 521 522 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 523 /* 524 * This is the first time this thread has used the FPU or 525 * the PCB doesn't contain a clean FPU state. Explicitly 526 * initialize the FPU and load the default control word. 527 */ 528 fninit(); 529 control = __INITIAL_NPXCW__; 530 fldcw(&control); 531 pcb->pcb_flags |= PCB_NPXINITDONE; 532 } else { 533 /* 534 * The following frstor may cause a trap when the state 535 * being restored has a pending error. The error will 536 * appear to have been triggered by the current (npx) user 537 * instruction even when that instruction is a no-wait 538 * instruction that should not trigger an error (e.g., 539 * instructions are broken the same as frstor, so our 540 * treatment does not amplify the breakage. 541 */ 542 fxrstor(&pcb->pcb_save); 543 } 544 intr_restore(s); 545 546 return (1); 547 } 548 549 /* 550 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx 551 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by 552 * no-wait npx instructions. See the Intel application note AP-578 for 553 * details. This doesn't cause any additional complications here. IRQ13's 554 * are inherently asynchronous unless the CPU is frozen to deliver them -- 555 * one that started in userland may be delivered many instructions later, 556 * after the process has entered the kernel. It may even be delivered after 557 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in 558 * the same way as a very-late-arriving non-spurious IRQ13 from user mode: 559 * it is normally ignored at first because we set fpcurthread to NULL; it is 560 * normally retriggered in npxdna() after return to user mode. 561 * 562 * npxsave() must be called with interrupts disabled, so that it clears 563 * fpcurthread atomically with saving the state. We require callers to do the 564 * disabling, since most callers need to disable interrupts anyway to call 565 * npxsave() atomically with checking fpcurthread. 566 * 567 * A previous version of npxsave() went to great lengths to excecute fnsave 568 * with interrupts enabled in case executing it froze the CPU. This case 569 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply 570 * spurious freezes. 571 */ 572 void 573 npxsave(addr) 574 struct savefpu *addr; 575 { 576 577 stop_emulating(); 578 fxsave(addr); 579 580 start_emulating(); 581 PCPU_SET(fpcurthread, NULL); 582 } 583 584 /* 585 * This should be called with interrupts disabled and only when the owning 586 * FPU thread is non-null. 587 */ 588 void 589 npxdrop() 590 { 591 struct thread *td; 592 593 td = PCPU_GET(fpcurthread); 594 PCPU_SET(fpcurthread, NULL); 595 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; 596 start_emulating(); 597 } 598 599 /* 600 * Get the state of the FPU without dropping ownership (if possible). 601 * It returns the FPU ownership status. 602 */ 603 int 604 npxgetregs(td, addr) 605 struct thread *td; 606 struct savefpu *addr; 607 { 608 register_t s; 609 610 if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 611 if (npx_cleanstate_ready) 612 bcopy(&npx_cleanstate, addr, sizeof(npx_cleanstate)); 613 else 614 bzero(addr, sizeof(*addr)); 615 return (_MC_FPOWNED_NONE); 616 } 617 s = intr_disable(); 618 if (td == PCPU_GET(fpcurthread)) { 619 fxsave(addr); 620 intr_restore(s); 621 return (_MC_FPOWNED_FPU); 622 } else { 623 intr_restore(s); 624 bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); 625 return (_MC_FPOWNED_PCB); 626 } 627 } 628 629 /* 630 * Set the state of the FPU. 631 */ 632 void 633 npxsetregs(td, addr) 634 struct thread *td; 635 struct savefpu *addr; 636 { 637 register_t s; 638 639 s = intr_disable(); 640 if (td == PCPU_GET(fpcurthread)) { 641 fxrstor(addr); 642 intr_restore(s); 643 } else { 644 intr_restore(s); 645 bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); 646 } 647 curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; 648 } 649 650 static device_method_t npx_methods[] = { 651 /* Device interface */ 652 DEVMETHOD(device_identify, npx_identify), 653 DEVMETHOD(device_probe, npx_probe), 654 DEVMETHOD(device_attach, npx_attach), 655 DEVMETHOD(device_detach, bus_generic_detach), 656 DEVMETHOD(device_shutdown, bus_generic_shutdown), 657 DEVMETHOD(device_suspend, bus_generic_suspend), 658 DEVMETHOD(device_resume, bus_generic_resume), 659 660 { 0, 0 } 661 }; 662 663 static driver_t npx_driver = { 664 "npx", 665 npx_methods, 666 1, /* no softc */ 667 }; 668 669 static devclass_t npx_devclass; 670 671 /* 672 * We prefer to attach to the root nexus so that the usual case (exception 16) 673 * doesn't describe the processor as being `on isa'. 674 */ 675 DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); 676 677 #ifdef DEV_ISA 678 /* 679 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 680 */ 681 static struct isa_pnp_id npxisa_ids[] = { 682 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 683 { 0 } 684 }; 685 686 static int 687 npxisa_probe(device_t dev) 688 { 689 int result; 690 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { 691 device_quiet(dev); 692 } 693 return(result); 694 } 695 696 static int 697 npxisa_attach(device_t dev) 698 { 699 return (0); 700 } 701 702 static device_method_t npxisa_methods[] = { 703 /* Device interface */ 704 DEVMETHOD(device_probe, npxisa_probe), 705 DEVMETHOD(device_attach, npxisa_attach), 706 DEVMETHOD(device_detach, bus_generic_detach), 707 DEVMETHOD(device_shutdown, bus_generic_shutdown), 708 DEVMETHOD(device_suspend, bus_generic_suspend), 709 DEVMETHOD(device_resume, bus_generic_resume), 710 711 { 0, 0 } 712 }; 713 714 static driver_t npxisa_driver = { 715 "npxisa", 716 npxisa_methods, 717 1, /* no softc */ 718 }; 719 720 static devclass_t npxisa_devclass; 721 722 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); 723 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); 724 #endif /* DEV_ISA */ 725