xref: /freebsd/sys/amd64/amd64/fpu.c (revision e627b39baccd1ec9129690167cf5e6d860509655)
1 /*-
2  * Copyright (c) 1990 William Jolitz.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by the University of
17  *	California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
35  *	$Id: npx.c,v 1.30 1996/06/25 20:30:38 bde Exp $
36  */
37 
38 #include "npx.h"
39 #if NNPX > 0
40 
41 #include "opt_math_emulate.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/sysctl.h>
47 #include <sys/conf.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/ioctl.h>
51 #include <sys/syslog.h>
52 #include <sys/signalvar.h>
53 
54 #include <machine/cpu.h>
55 #include <machine/pcb.h>
56 #include <machine/md_var.h>
57 #include <machine/trap.h>
58 #include <machine/clock.h>
59 #include <machine/specialreg.h>
60 
61 #include <i386/isa/icu.h>
62 #include <i386/isa/isa_device.h>
63 #include <i386/isa/isa.h>
64 
65 /*
66  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
67  */
68 
69 #ifdef	__GNUC__
70 
71 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
72 #define	fnclex()		__asm("fnclex")
73 #define	fninit()		__asm("fninit")
74 #define	fnop()			__asm("fnop")
75 #define	fnsave(addr)		__asm("fnsave %0" : "=m" (*(addr)))
76 #define	fnstcw(addr)		__asm("fnstcw %0" : "=m" (*(addr)))
77 #define	fnstsw(addr)		__asm("fnstsw %0" : "=m" (*(addr)))
78 #define	fp_divide_by_0()	__asm("fldz; fld1; fdiv %st,%st(1); fnop")
79 #define	frstor(addr)		__asm("frstor %0" : : "m" (*(addr)))
80 #define	start_emulating()	__asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
81 				      : : "n" (CR0_TS) : "ax")
82 #define	stop_emulating()	__asm("clts")
83 
84 #else	/* not __GNUC__ */
85 
86 void	fldcw		__P((caddr_t addr));
87 void	fnclex		__P((void));
88 void	fninit		__P((void));
89 void	fnop		__P((void));
90 void	fnsave		__P((caddr_t addr));
91 void	fnstcw		__P((caddr_t addr));
92 void	fnstsw		__P((caddr_t addr));
93 void	fp_divide_by_0	__P((void));
94 void	frstor		__P((caddr_t addr));
95 void	start_emulating	__P((void));
96 void	stop_emulating	__P((void));
97 
98 #endif	/* __GNUC__ */
99 
100 typedef u_char bool_t;
101 
102 static	int	npxattach	__P((struct isa_device *dvp));
103 static	int	npxprobe	__P((struct isa_device *dvp));
104 static	int	npxprobe1	__P((struct isa_device *dvp));
105 
106 struct	isa_driver npxdriver = {
107 	npxprobe, npxattach, "npx",
108 };
109 
110 int	hw_float;		/* XXX currently just alias for npx_exists */
111 
112 SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint,
113 	CTLFLAG_RD, &hw_float, 0,
114 	"Floatingpoint instructions executed in hardware");
115 
116 static u_int	npx0_imask = SWI_CLOCK_MASK;
117 struct proc	*npxproc;
118 
119 static	bool_t			npx_ex16;
120 static	bool_t			npx_exists;
121 static	struct gate_descriptor	npx_idt_probeintr;
122 static	int			npx_intrno;
123 static	volatile u_int		npx_intrs_while_probing;
124 static	bool_t			npx_irq13;
125 static	volatile u_int		npx_traps_while_probing;
126 
127 /*
128  * Special interrupt handlers.  Someday intr0-intr15 will be used to count
129  * interrupts.  We'll still need a special exception 16 handler.  The busy
130  * latch stuff in probeintr() can be moved to npxprobe().
131  */
132 inthand_t probeintr;
133 asm
134 ("
135 	.text
136 _probeintr:
137 	ss
138 	incl	_npx_intrs_while_probing
139 	pushl	%eax
140 	movb	$0x20,%al	# EOI (asm in strings loses cpp features)
141 	outb	%al,$0xa0	# IO_ICU2
142 	outb	%al,$0x20	# IO_ICU1
143 	movb	$0,%al
144 	outb	%al,$0xf0	# clear BUSY# latch
145 	popl	%eax
146 	iret
147 ");
148 
149 inthand_t probetrap;
150 asm
151 ("
152 	.text
153 _probetrap:
154 	ss
155 	incl	_npx_traps_while_probing
156 	fnclex
157 	iret
158 ");
159 
160 /*
161  * Probe routine.  Initialize cr0 to give correct behaviour for [f]wait
162  * whether the device exists or not (XXX should be elsewhere).  Set flags
163  * to tell npxattach() what to do.  Modify device struct if npx doesn't
164  * need to use interrupts.  Return 1 if device exists.
165  */
166 static int
167 npxprobe(dvp)
168 	struct isa_device *dvp;
169 {
170 	int	result;
171 	u_long	save_eflags;
172 	u_char	save_icu1_mask;
173 	u_char	save_icu2_mask;
174 	struct	gate_descriptor save_idt_npxintr;
175 	struct	gate_descriptor save_idt_npxtrap;
176 	/*
177 	 * This routine is now just a wrapper for npxprobe1(), to install
178 	 * special npx interrupt and trap handlers, to enable npx interrupts
179 	 * and to disable other interrupts.  Someday isa_configure() will
180 	 * install suitable handlers and run with interrupts enabled so we
181 	 * won't need to do so much here.
182 	 */
183 	npx_intrno = NRSVIDT + ffs(dvp->id_irq) - 1;
184 	save_eflags = read_eflags();
185 	disable_intr();
186 	save_icu1_mask = inb(IO_ICU1 + 1);
187 	save_icu2_mask = inb(IO_ICU2 + 1);
188 	save_idt_npxintr = idt[npx_intrno];
189 	save_idt_npxtrap = idt[16];
190 	outb(IO_ICU1 + 1, ~(IRQ_SLAVE | dvp->id_irq));
191 	outb(IO_ICU2 + 1, ~(dvp->id_irq >> 8));
192 	setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
193 	setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
194 	npx_idt_probeintr = idt[npx_intrno];
195 	enable_intr();
196 	result = npxprobe1(dvp);
197 	disable_intr();
198 	outb(IO_ICU1 + 1, save_icu1_mask);
199 	outb(IO_ICU2 + 1, save_icu2_mask);
200 	idt[npx_intrno] = save_idt_npxintr;
201 	idt[16] = save_idt_npxtrap;
202 	write_eflags(save_eflags);
203 	return (result);
204 }
205 
206 static int
207 npxprobe1(dvp)
208 	struct isa_device *dvp;
209 {
210 	u_short control;
211 	u_short status;
212 
213 	/*
214 	 * Partially reset the coprocessor, if any.  Some BIOS's don't reset
215 	 * it after a warm boot.
216 	 */
217 	outb(0xf1, 0);		/* full reset on some systems, NOP on others */
218 	outb(0xf0, 0);		/* clear BUSY# latch */
219 	/*
220 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
221 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
222 	 * bit to control the trap, because setting the CR0_EM bit does
223 	 * not cause WAIT instructions to trap.  It's important to trap
224 	 * WAIT instructions - otherwise the "wait" variants of no-wait
225 	 * control instructions would degenerate to the "no-wait" variants
226 	 * after FP context switches but work correctly otherwise.  It's
227 	 * particularly important to trap WAITs when there is no NPX -
228 	 * otherwise the "wait" variants would always degenerate.
229 	 *
230 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
231 	 * Setting it should fail or do nothing on lesser processors.
232 	 */
233 	load_cr0(rcr0() | CR0_MP | CR0_NE);
234 	/*
235 	 * But don't trap while we're probing.
236 	 */
237 	stop_emulating();
238 	/*
239 	 * Finish resetting the coprocessor, if any.  If there is an error
240 	 * pending, then we may get a bogus IRQ13, but probeintr() will handle
241 	 * it OK.  Bogus halts have never been observed, but we enabled
242 	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
243 	 */
244 	fninit();
245 	/*
246 	 * Don't use fwait here because it might hang.
247 	 * Don't use fnop here because it usually hangs if there is no FPU.
248 	 */
249 	DELAY(1000);		/* wait for any IRQ13 */
250 #ifdef DIAGNOSTIC
251 	if (npx_intrs_while_probing != 0)
252 		printf("fninit caused %u bogus npx interrupt(s)\n",
253 		       npx_intrs_while_probing);
254 	if (npx_traps_while_probing != 0)
255 		printf("fninit caused %u bogus npx trap(s)\n",
256 		       npx_traps_while_probing);
257 #endif
258 	/*
259 	 * Check for a status of mostly zero.
260 	 */
261 	status = 0x5a5a;
262 	fnstsw(&status);
263 	if ((status & 0xb8ff) == 0) {
264 		/*
265 		 * Good, now check for a proper control word.
266 		 */
267 		control = 0x5a5a;
268 		fnstcw(&control);
269 		if ((control & 0x1f3f) == 0x033f) {
270 			hw_float = npx_exists = 1;
271 			/*
272 			 * We have an npx, now divide by 0 to see if exception
273 			 * 16 works.
274 			 */
275 			control &= ~(1 << 2);	/* enable divide by 0 trap */
276 			fldcw(&control);
277 			npx_traps_while_probing = npx_intrs_while_probing = 0;
278 			fp_divide_by_0();
279 			if (npx_traps_while_probing != 0) {
280 				/*
281 				 * Good, exception 16 works.
282 				 */
283 				npx_ex16 = 1;
284 				dvp->id_irq = 0;	/* zap the interrupt */
285 				/*
286 				 * special return value to flag that we do not
287 				 * actually use any I/O registers
288 				 */
289 				return (-1);
290 			}
291 			if (npx_intrs_while_probing != 0) {
292 				/*
293 				 * Bad, we are stuck with IRQ13.
294 				 */
295 				npx_irq13 = 1;
296 				/*
297 				 * npxattach would be too late to set npx0_imask.
298 				 */
299 				npx0_imask |= dvp->id_irq;
300 				return (IO_NPXSIZE);
301 			}
302 			/*
303 			 * Worse, even IRQ13 is broken.  Use emulator.
304 			 */
305 		}
306 	}
307 	/*
308 	 * Probe failed, but we want to get to npxattach to initialize the
309 	 * emulator and say that it has been installed.  XXX handle devices
310 	 * that aren't really devices better.
311 	 */
312 	dvp->id_irq = 0;
313 	/*
314 	 * special return value to flag that we do not
315 	 * actually use any I/O registers
316 	 */
317 	return (-1);
318 }
319 
320 /*
321  * Attach routine - announce which it is, and wire into system
322  */
323 int
324 npxattach(dvp)
325 	struct isa_device *dvp;
326 {
327 	if (npx_ex16)
328 		printf("npx%d: INT 16 interface\n", dvp->id_unit);
329 	else if (npx_irq13)
330 		;		/* higher level has printed "irq 13" */
331 #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE)
332 	else if (npx_exists) {
333 		printf("npx%d: error reporting broken; using 387 emulator\n",
334 			dvp->id_unit);
335 		npx_exists = 0;
336 	} else
337 		printf("npx%d: 387 emulator\n",dvp->id_unit);
338 #else
339 	else
340 		printf("npx%d: no 387 emulator in kernel!\n", dvp->id_unit);
341 #endif
342 	npxinit(__INITIAL_NPXCW__);
343 	return (1);		/* XXX unused */
344 }
345 
346 /*
347  * Initialize floating point unit.
348  */
349 void
350 npxinit(control)
351 	u_short control;
352 {
353 	struct save87 dummy;
354 
355 	if (!npx_exists)
356 		return;
357 	/*
358 	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
359 	 * fnsave to throw away any junk in the fpu.  npxsave() initializes
360 	 * the fpu and sets npxproc = NULL as important side effects.
361 	 */
362 	npxsave(&dummy);
363 	stop_emulating();
364 	fldcw(&control);
365 	if (curpcb != NULL)
366 		fnsave(&curpcb->pcb_savefpu);
367 	start_emulating();
368 }
369 
370 /*
371  * Free coprocessor (if we have it).
372  */
373 void
374 npxexit(p)
375 	struct proc *p;
376 {
377 
378 	if (p == npxproc)
379 		npxsave(&curpcb->pcb_savefpu);
380 	if (npx_exists) {
381 		u_int	masked_exceptions;
382 
383 		masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw
384 				    & curpcb->pcb_savefpu.sv_env.en_sw & 0x7f;
385 		/*
386 		 * Overflow, divde by 0, and invalid operand would have
387 		 * caused a trap in 1.1.5.
388 		 */
389 		if (masked_exceptions & 0x0d)
390 			log(LOG_ERR,
391 	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
392 			    p->p_pid, p->p_comm, masked_exceptions);
393 	}
394 }
395 
396 /*
397  * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
398  *
399  * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
400  * depend on longjmp() restoring a usable state.  Restoring the state
401  * or examining it might fail if we didn't clear exceptions.
402  *
403  * XXX there is no standard way to tell SIGFPE handlers about the error
404  * state.  The old interface:
405  *
406  *	void handler(int sig, int code, struct sigcontext *scp);
407  *
408  * is broken because it is non-ANSI and because the FP state is not in
409  * struct sigcontext.
410  *
411  * XXX the FP state is not preserved across signal handlers.  So signal
412  * handlers cannot afford to do FP unless they preserve the state or
413  * longjmp() out.  Both preserving the state and longjmp()ing may be
414  * destroyed by IRQ13 bugs.  Clearing FP exceptions is not an acceptable
415  * solution for signals other than SIGFPE.
416  */
417 void
418 npxintr(unit)
419 	int unit;
420 {
421 	int code;
422 	struct intrframe *frame;
423 
424 	if (npxproc == NULL || !npx_exists) {
425 		printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n",
426 		       npxproc, curproc, npx_exists);
427 		panic("npxintr from nowhere");
428 	}
429 	if (npxproc != curproc) {
430 		printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n",
431 		       npxproc, curproc, npx_exists);
432 		panic("npxintr from non-current process");
433 	}
434 
435 	outb(0xf0, 0);
436 	fnstsw(&curpcb->pcb_savefpu.sv_ex_sw);
437 	fnclex();
438 	fnop();
439 
440 	/*
441 	 * Pass exception to process.
442 	 */
443 	frame = (struct intrframe *)&unit;	/* XXX */
444 	if (ISPL(frame->if_cs) == SEL_UPL) {
445 		/*
446 		 * Interrupt is essentially a trap, so we can afford to call
447 		 * the SIGFPE handler (if any) as soon as the interrupt
448 		 * returns.
449 		 *
450 		 * XXX little or nothing is gained from this, and plenty is
451 		 * lost - the interrupt frame has to contain the trap frame
452 		 * (this is otherwise only necessary for the rescheduling trap
453 		 * in doreti, and the frame for that could easily be set up
454 		 * just before it is used).
455 		 */
456 		curproc->p_md.md_regs = &frame->if_es;
457 #ifdef notyet
458 		/*
459 		 * Encode the appropriate code for detailed information on
460 		 * this exception.
461 		 */
462 		code = XXX_ENCODE(curpcb->pcb_savefpu.sv_ex_sw);
463 #else
464 		code = 0;	/* XXX */
465 #endif
466 		trapsignal(curproc, SIGFPE, code);
467 	} else {
468 		/*
469 		 * Nested interrupt.  These losers occur when:
470 		 *	o an IRQ13 is bogusly generated at a bogus time, e.g.:
471 		 *		o immediately after an fnsave or frstor of an
472 		 *		  error state.
473 		 *		o a couple of 386 instructions after
474 		 *		  "fstpl _memvar" causes a stack overflow.
475 		 *	  These are especially nasty when combined with a
476 		 *	  trace trap.
477 		 *	o an IRQ13 occurs at the same time as another higher-
478 		 *	  priority interrupt.
479 		 *
480 		 * Treat them like a true async interrupt.
481 		 */
482 		psignal(curproc, SIGFPE);
483 	}
484 }
485 
486 /*
487  * Implement device not available (DNA) exception
488  *
489  * It would be better to switch FP context here (if curproc != npxproc)
490  * and not necessarily for every context switch, but it is too hard to
491  * access foreign pcb's.
492  */
493 int
494 npxdna()
495 {
496 	if (!npx_exists)
497 		return (0);
498 	if (npxproc != NULL) {
499 		printf("npxdna: npxproc = %p, curproc = %p\n",
500 		       npxproc, curproc);
501 		panic("npxdna");
502 	}
503 	stop_emulating();
504 	/*
505 	 * Record new context early in case frstor causes an IRQ13.
506 	 */
507 	npxproc = curproc;
508 	curpcb->pcb_savefpu.sv_ex_sw = 0;
509 	/*
510 	 * The following frstor may cause an IRQ13 when the state being
511 	 * restored has a pending error.  The error will appear to have been
512 	 * triggered by the current (npx) user instruction even when that
513 	 * instruction is a no-wait instruction that should not trigger an
514 	 * error (e.g., fnclex).  On at least one 486 system all of the
515 	 * no-wait instructions are broken the same as frstor, so our
516 	 * treatment does not amplify the breakage.  On at least one
517 	 * 386/Cyrix 387 system, fnclex works correctly while frstor and
518 	 * fnsave are broken, so our treatment breaks fnclex if it is the
519 	 * first FPU instruction after a context switch.
520 	 */
521 	frstor(&curpcb->pcb_savefpu);
522 
523 	return (1);
524 }
525 
526 /*
527  * Wrapper for fnsave instruction to handle h/w bugs.  If there is an error
528  * pending, then fnsave generates a bogus IRQ13 on some systems.  Force
529  * any IRQ13 to be handled immediately, and then ignore it.  This routine is
530  * often called at splhigh so it must not use many system services.  In
531  * particular, it's much easier to install a special handler than to
532  * guarantee that it's safe to use npxintr() and its supporting code.
533  */
534 void
535 npxsave(addr)
536 	struct save87 *addr;
537 {
538 	u_char	icu1_mask;
539 	u_char	icu2_mask;
540 	u_char	old_icu1_mask;
541 	u_char	old_icu2_mask;
542 	struct gate_descriptor	save_idt_npxintr;
543 
544 	disable_intr();
545 	old_icu1_mask = inb(IO_ICU1 + 1);
546 	old_icu2_mask = inb(IO_ICU2 + 1);
547 	save_idt_npxintr = idt[npx_intrno];
548 	outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
549 	outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
550 	idt[npx_intrno] = npx_idt_probeintr;
551 	enable_intr();
552 	stop_emulating();
553 	fnsave(addr);
554 	fnop();
555 	start_emulating();
556 	npxproc = NULL;
557 	disable_intr();
558 	icu1_mask = inb(IO_ICU1 + 1);	/* masks may have changed */
559 	icu2_mask = inb(IO_ICU2 + 1);
560 	outb(IO_ICU1 + 1,
561 	     (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask));
562 	outb(IO_ICU2 + 1,
563 	     (icu2_mask & ~(npx0_imask >> 8))
564 	     | (old_icu2_mask & (npx0_imask >> 8)));
565 	idt[npx_intrno] = save_idt_npxintr;
566 	enable_intr();		/* back to usual state */
567 }
568 
569 #endif /* NNPX > 0 */
570