xref: /freebsd/sys/amd64/amd64/fpu.c (revision 5ebc7e6281887681c3a348a5a4c902e262ccd656)
1 /*-
2  * Copyright (c) 1990 William Jolitz.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by the University of
17  *	California, Berkeley and its contributors.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
35  *	$Id: npx.c,v 1.21 1995/03/05 04:06:44 wpaul Exp $
36  */
37 
38 #include "npx.h"
39 #if NNPX > 0
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/conf.h>
44 #include <sys/file.h>
45 #include <sys/proc.h>
46 #include <sys/devconf.h>
47 #include <sys/ioctl.h>
48 #include <sys/syslog.h>
49 #include <sys/signalvar.h>
50 
51 #include <machine/cpu.h>
52 #include <machine/pcb.h>
53 #include <machine/trap.h>
54 #include <machine/clock.h>
55 #include <machine/specialreg.h>
56 
57 #include <i386/isa/icu.h>
58 #include <i386/isa/isa_device.h>
59 #include <i386/isa/isa.h>
60 
61 /*
62  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
63  */
64 
65 #ifdef	__GNUC__
66 
67 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
68 #define	fnclex()		__asm("fnclex")
69 #define	fninit()		__asm("fninit")
70 #define	fnop()			__asm("fnop")
71 #define	fnsave(addr)		__asm("fnsave %0" : "=m" (*(addr)))
72 #define	fnstcw(addr)		__asm("fnstcw %0" : "=m" (*(addr)))
73 #define	fnstsw(addr)		__asm("fnstsw %0" : "=m" (*(addr)))
74 #define	fp_divide_by_0()	__asm("fldz; fld1; fdiv %st,%st(1); fnop")
75 #define	frstor(addr)		__asm("frstor %0" : : "m" (*(addr)))
76 #define	start_emulating()	__asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
77 				      : : "n" (CR0_TS) : "ax")
78 #define	stop_emulating()	__asm("clts")
79 
80 #else	/* not __GNUC__ */
81 
82 void	fldcw		__P((caddr_t addr));
83 void	fnclex		__P((void));
84 void	fninit		__P((void));
85 void	fnop		__P((void));
86 void	fnsave		__P((caddr_t addr));
87 void	fnstcw		__P((caddr_t addr));
88 void	fnstsw		__P((caddr_t addr));
89 void	fp_divide_by_0	__P((void));
90 void	frstor		__P((caddr_t addr));
91 void	start_emulating	__P((void));
92 void	stop_emulating	__P((void));
93 
94 #endif	/* __GNUC__ */
95 
96 typedef u_char bool_t;
97 
98 static	int	npxattach	__P((struct isa_device *dvp));
99 static	int	npxprobe	__P((struct isa_device *dvp));
100 static	int	npxprobe1	__P((struct isa_device *dvp));
101 
102 struct	isa_driver npxdriver = {
103 	npxprobe, npxattach, "npx",
104 };
105 
106 int	hw_float;		/* XXX currently just alias for npx_exists */
107 u_int	npx0_imask = SWI_CLOCK_MASK;
108 struct proc	*npxproc;
109 
110 static	bool_t			npx_ex16;
111 static	bool_t			npx_exists;
112 static	struct gate_descriptor	npx_idt_probeintr;
113 static	int			npx_intrno;
114 static	volatile u_int		npx_intrs_while_probing;
115 static	bool_t			npx_irq13;
116 static	volatile u_int		npx_traps_while_probing;
117 
118 /*
119  * Special interrupt handlers.  Someday intr0-intr15 will be used to count
120  * interrupts.  We'll still need a special exception 16 handler.  The busy
121  * latch stuff in probeintr() can be moved to npxprobe().
122  */
123 inthand_t probeintr;
124 asm
125 ("
126 	.text
127 _probeintr:
128 	ss
129 	incl	_npx_intrs_while_probing
130 	pushl	%eax
131 	movb	$0x20,%al	# EOI (asm in strings loses cpp features)
132 	outb	%al,$0xa0	# IO_ICU2
133 	outb	%al,$0x20	# IO_ICU1
134 	movb	$0,%al
135 	outb	%al,$0xf0	# clear BUSY# latch
136 	popl	%eax
137 	iret
138 ");
139 
140 inthand_t probetrap;
141 asm
142 ("
143 	.text
144 _probetrap:
145 	ss
146 	incl	_npx_traps_while_probing
147 	fnclex
148 	iret
149 ");
150 
151 static struct kern_devconf kdc_npx[NNPX] = { {
152 	0, 0, 0,		/* filled in by dev_attach */
153 	"npx", 0, { MDDT_ISA, 0 },
154 	isa_generic_externalize, 0, 0, ISA_EXTERNALLEN,
155 	&kdc_isa0,		/* parent */
156 	0,			/* parentdata */
157 	DC_UNCONFIGURED,	/* state */
158 	"Floating-point unit",
159 	DC_CLS_MISC		/* class */
160 } };
161 
162 static inline void
163 npx_registerdev(struct isa_device *id)
164 {
165 	int	unit;
166 
167 	unit = id->id_unit;
168 	if (unit != 0)
169 		kdc_npx[unit] = kdc_npx[0];
170 	kdc_npx[unit].kdc_unit = unit;
171 	kdc_npx[unit].kdc_isa = id;
172 	dev_attach(&kdc_npx[unit]);
173 }
174 
175 /*
176  * Probe routine.  Initialize cr0 to give correct behaviour for [f]wait
177  * whether the device exists or not (XXX should be elsewhere).  Set flags
178  * to tell npxattach() what to do.  Modify device struct if npx doesn't
179  * need to use interrupts.  Return 1 if device exists.
180  */
181 static int
182 npxprobe(dvp)
183 	struct isa_device *dvp;
184 {
185 	int	result;
186 	u_long	save_eflags;
187 	u_char	save_icu1_mask;
188 	u_char	save_icu2_mask;
189 	struct	gate_descriptor save_idt_npxintr;
190 	struct	gate_descriptor save_idt_npxtrap;
191 	/*
192 	 * This routine is now just a wrapper for npxprobe1(), to install
193 	 * special npx interrupt and trap handlers, to enable npx interrupts
194 	 * and to disable other interrupts.  Someday isa_configure() will
195 	 * install suitable handlers and run with interrupts enabled so we
196 	 * won't need to do so much here.
197 	 */
198 	npx_registerdev(dvp);
199 	npx_intrno = NRSVIDT + ffs(dvp->id_irq) - 1;
200 	save_eflags = read_eflags();
201 	disable_intr();
202 	save_icu1_mask = inb(IO_ICU1 + 1);
203 	save_icu2_mask = inb(IO_ICU2 + 1);
204 	save_idt_npxintr = idt[npx_intrno];
205 	save_idt_npxtrap = idt[16];
206 	outb(IO_ICU1 + 1, ~(IRQ_SLAVE | dvp->id_irq));
207 	outb(IO_ICU2 + 1, ~(dvp->id_irq >> 8));
208 	setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL);
209 	setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL);
210 	npx_idt_probeintr = idt[npx_intrno];
211 	enable_intr();
212 	result = npxprobe1(dvp);
213 	disable_intr();
214 	outb(IO_ICU1 + 1, save_icu1_mask);
215 	outb(IO_ICU2 + 1, save_icu2_mask);
216 	idt[npx_intrno] = save_idt_npxintr;
217 	idt[16] = save_idt_npxtrap;
218 	write_eflags(save_eflags);
219 	return (result);
220 }
221 
222 static int
223 npxprobe1(dvp)
224 	struct isa_device *dvp;
225 {
226 	u_short control;
227 	u_short status;
228 
229 	/*
230 	 * Partially reset the coprocessor, if any.  Some BIOS's don't reset
231 	 * it after a warm boot.
232 	 */
233 	outb(0xf1, 0);		/* full reset on some systems, NOP on others */
234 	outb(0xf0, 0);		/* clear BUSY# latch */
235 	/*
236 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
237 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
238 	 * bit to control the trap, because setting the CR0_EM bit does
239 	 * not cause WAIT instructions to trap.  It's important to trap
240 	 * WAIT instructions - otherwise the "wait" variants of no-wait
241 	 * control instructions would degenerate to the "no-wait" variants
242 	 * after FP context switches but work correctly otherwise.  It's
243 	 * particularly important to trap WAITs when there is no NPX -
244 	 * otherwise the "wait" variants would always degenerate.
245 	 *
246 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
247 	 * Setting it should fail or do nothing on lesser processors.
248 	 */
249 	load_cr0(rcr0() | CR0_MP | CR0_NE);
250 	/*
251 	 * But don't trap while we're probing.
252 	 */
253 	stop_emulating();
254 	/*
255 	 * Finish resetting the coprocessor, if any.  If there is an error
256 	 * pending, then we may get a bogus IRQ13, but probeintr() will handle
257 	 * it OK.  Bogus halts have never been observed, but we enabled
258 	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
259 	 */
260 	fninit();
261 	/*
262 	 * Don't use fwait here because it might hang.
263 	 * Don't use fnop here because it usually hangs if there is no FPU.
264 	 */
265 	DELAY(1000);		/* wait for any IRQ13 */
266 #ifdef DIAGNOSTIC
267 	if (npx_intrs_while_probing != 0)
268 		printf("fninit caused %u bogus npx interrupt(s)\n",
269 		       npx_intrs_while_probing);
270 	if (npx_traps_while_probing != 0)
271 		printf("fninit caused %u bogus npx trap(s)\n",
272 		       npx_traps_while_probing);
273 #endif
274 	/*
275 	 * Check for a status of mostly zero.
276 	 */
277 	status = 0x5a5a;
278 	fnstsw(&status);
279 	if ((status & 0xb8ff) == 0) {
280 		/*
281 		 * Good, now check for a proper control word.
282 		 */
283 		control = 0x5a5a;
284 		fnstcw(&control);
285 		if ((control & 0x1f3f) == 0x033f) {
286 			hw_float = npx_exists = 1;
287 			/*
288 			 * We have an npx, now divide by 0 to see if exception
289 			 * 16 works.
290 			 */
291 			control &= ~(1 << 2);	/* enable divide by 0 trap */
292 			fldcw(&control);
293 			npx_traps_while_probing = npx_intrs_while_probing = 0;
294 			fp_divide_by_0();
295 			if (npx_traps_while_probing != 0) {
296 				/*
297 				 * Good, exception 16 works.
298 				 */
299 				npx_ex16 = 1;
300 				dvp->id_irq = 0;	/* zap the interrupt */
301 				/*
302 				 * special return value to flag that we do not
303 				 * actually use any I/O registers
304 				 */
305 				return (-1);
306 			}
307 			if (npx_intrs_while_probing != 0) {
308 				/*
309 				 * Bad, we are stuck with IRQ13.
310 				 */
311 				npx_irq13 = 1;
312 				/*
313 				 * npxattach would be too late to set npx0_imask.
314 				 */
315 				npx0_imask |= dvp->id_irq;
316 				return (IO_NPXSIZE);
317 			}
318 			/*
319 			 * Worse, even IRQ13 is broken.  Use emulator.
320 			 */
321 		}
322 	}
323 	/*
324 	 * Probe failed, but we want to get to npxattach to initialize the
325 	 * emulator and say that it has been installed.  XXX handle devices
326 	 * that aren't really devices better.
327 	 */
328 	dvp->id_irq = 0;
329 	/*
330 	 * special return value to flag that we do not
331 	 * actually use any I/O registers
332 	 */
333 	return (-1);
334 }
335 
336 /*
337  * Attach routine - announce which it is, and wire into system
338  */
339 int
340 npxattach(dvp)
341 	struct isa_device *dvp;
342 {
343 	if (npx_ex16)
344 		printf("npx%d: INT 16 interface\n", dvp->id_unit);
345 	else if (npx_irq13)
346 		;		/* higher level has printed "irq 13" */
347 #if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE)
348 	else if (npx_exists) {
349 		printf("npx%d: error reporting broken; using 387 emulator\n",
350 			dvp->id_unit);
351 		npx_exists = 0;
352 	} else
353 		printf("npx%d: 387 emulator\n",dvp->id_unit);
354 #else
355 	else
356 		printf("npx%d: no 387 emulator in kernel!\n", dvp->id_unit);
357 #endif
358 	npxinit(__INITIAL_NPXCW__);
359 	if (npx_exists) {
360 		kdc_npx[dvp->id_unit].kdc_state = DC_BUSY;
361 	}
362 	return (1);		/* XXX unused */
363 }
364 
365 /*
366  * Initialize floating point unit.
367  */
368 void
369 npxinit(control)
370 	u_short control;
371 {
372 	struct save87 dummy;
373 
374 	if (!npx_exists)
375 		return;
376 	/*
377 	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
378 	 * fnsave to throw away any junk in the fpu.  npxsave() initializes
379 	 * the fpu and sets npxproc = NULL as important side effects.
380 	 */
381 	npxsave(&dummy);
382 	stop_emulating();
383 	fldcw(&control);
384 	if (curpcb != NULL)
385 		fnsave(&curpcb->pcb_savefpu);
386 	start_emulating();
387 }
388 
389 /*
390  * Free coprocessor (if we have it).
391  */
392 void
393 npxexit(p)
394 	struct proc *p;
395 {
396 
397 	if (p == npxproc)
398 		npxsave(&curpcb->pcb_savefpu);
399 	if (npx_exists) {
400 		u_int	masked_exceptions;
401 
402 		masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw
403 				    & curpcb->pcb_savefpu.sv_env.en_sw & 0x7f;
404 		/*
405 		 * Overflow, divde by 0, and invalid operand would have
406 		 * caused a trap in 1.1.5.
407 		 */
408 		if (masked_exceptions & 0x0d)
409 			log(LOG_ERR,
410 	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
411 			    p->p_pid, p->p_comm, masked_exceptions);
412 	}
413 }
414 
415 /*
416  * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE.
417  *
418  * Clearing exceptions is necessary mainly to avoid IRQ13 bugs.  We now
419  * depend on longjmp() restoring a usable state.  Restoring the state
420  * or examining it might fail if we didn't clear exceptions.
421  *
422  * XXX there is no standard way to tell SIGFPE handlers about the error
423  * state.  The old interface:
424  *
425  *	void handler(int sig, int code, struct sigcontext *scp);
426  *
427  * is broken because it is non-ANSI and because the FP state is not in
428  * struct sigcontext.
429  *
430  * XXX the FP state is not preserved across signal handlers.  So signal
431  * handlers cannot afford to do FP unless they preserve the state or
432  * longjmp() out.  Both preserving the state and longjmp()ing may be
433  * destroyed by IRQ13 bugs.  Clearing FP exceptions is not an acceptable
434  * solution for signals other than SIGFPE.
435  */
436 void
437 npxintr(frame)
438 	struct intrframe frame;
439 {
440 	int code;
441 
442 	if (npxproc == NULL || !npx_exists) {
443 		printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n",
444 		       npxproc, curproc, npx_exists);
445 		panic("npxintr from nowhere");
446 	}
447 	if (npxproc != curproc) {
448 		printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n",
449 		       npxproc, curproc, npx_exists);
450 		panic("npxintr from non-current process");
451 	}
452 
453 	outb(0xf0, 0);
454 	fnstsw(&curpcb->pcb_savefpu.sv_ex_sw);
455 	fnclex();
456 	fnop();
457 
458 	/*
459 	 * Pass exception to process.
460 	 */
461 	if (ISPL(frame.if_cs) == SEL_UPL) {
462 		/*
463 		 * Interrupt is essentially a trap, so we can afford to call
464 		 * the SIGFPE handler (if any) as soon as the interrupt
465 		 * returns.
466 		 *
467 		 * XXX little or nothing is gained from this, and plenty is
468 		 * lost - the interrupt frame has to contain the trap frame
469 		 * (this is otherwise only necessary for the rescheduling trap
470 		 * in doreti, and the frame for that could easily be set up
471 		 * just before it is used).
472 		 */
473 		curproc->p_md.md_regs = (int *)&frame.if_es;
474 #ifdef notyet
475 		/*
476 		 * Encode the appropriate code for detailed information on
477 		 * this exception.
478 		 */
479 		code = XXX_ENCODE(curpcb->pcb_savefpu.sv_ex_sw);
480 #else
481 		code = 0;	/* XXX */
482 #endif
483 		trapsignal(curproc, SIGFPE, code);
484 	} else {
485 		/*
486 		 * Nested interrupt.  These losers occur when:
487 		 *	o an IRQ13 is bogusly generated at a bogus time, e.g.:
488 		 *		o immediately after an fnsave or frstor of an
489 		 *		  error state.
490 		 *		o a couple of 386 instructions after
491 		 *		  "fstpl _memvar" causes a stack overflow.
492 		 *	  These are especially nasty when combined with a
493 		 *	  trace trap.
494 		 *	o an IRQ13 occurs at the same time as another higher-
495 		 *	  priority interrupt.
496 		 *
497 		 * Treat them like a true async interrupt.
498 		 */
499 		psignal(curproc, SIGFPE);
500 	}
501 }
502 
503 /*
504  * Implement device not available (DNA) exception
505  *
506  * It would be better to switch FP context here (if curproc != npxproc)
507  * and not necessarily for every context switch, but it is too hard to
508  * access foreign pcb's.
509  */
510 int
511 npxdna()
512 {
513 	if (!npx_exists)
514 		return (0);
515 	if (npxproc != NULL) {
516 		printf("npxdna: npxproc = %p, curproc = %p\n",
517 		       npxproc, curproc);
518 		panic("npxdna");
519 	}
520 	stop_emulating();
521 	/*
522 	 * Record new context early in case frstor causes an IRQ13.
523 	 */
524 	npxproc = curproc;
525 	curpcb->pcb_savefpu.sv_ex_sw = 0;
526 	/*
527 	 * The following frstor may cause an IRQ13 when the state being
528 	 * restored has a pending error.  The error will appear to have been
529 	 * triggered by the current (npx) user instruction even when that
530 	 * instruction is a no-wait instruction that should not trigger an
531 	 * error (e.g., fnclex).  On at least one 486 system all of the
532 	 * no-wait instructions are broken the same as frstor, so our
533 	 * treatment does not amplify the breakage.  On at least one
534 	 * 386/Cyrix 387 system, fnclex works correctly while frstor and
535 	 * fnsave are broken, so our treatment breaks fnclex if it is the
536 	 * first FPU instruction after a context switch.
537 	 */
538 	frstor(&curpcb->pcb_savefpu);
539 
540 	return (1);
541 }
542 
543 /*
544  * Wrapper for fnsave instruction to handle h/w bugs.  If there is an error
545  * pending, then fnsave generates a bogus IRQ13 on some systems.  Force
546  * any IRQ13 to be handled immediately, and then ignore it.  This routine is
547  * often called at splhigh so it must not use many system services.  In
548  * particular, it's much easier to install a special handler than to
549  * guarantee that it's safe to use npxintr() and its supporting code.
550  */
551 void
552 npxsave(addr)
553 	struct save87 *addr;
554 {
555 	u_char	icu1_mask;
556 	u_char	icu2_mask;
557 	u_char	old_icu1_mask;
558 	u_char	old_icu2_mask;
559 	struct gate_descriptor	save_idt_npxintr;
560 
561 	disable_intr();
562 	old_icu1_mask = inb(IO_ICU1 + 1);
563 	old_icu2_mask = inb(IO_ICU2 + 1);
564 	save_idt_npxintr = idt[npx_intrno];
565 	outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
566 	outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
567 	idt[npx_intrno] = npx_idt_probeintr;
568 	enable_intr();
569 	stop_emulating();
570 	fnsave(addr);
571 	fnop();
572 	start_emulating();
573 	npxproc = NULL;
574 	disable_intr();
575 	icu1_mask = inb(IO_ICU1 + 1);	/* masks may have changed */
576 	icu2_mask = inb(IO_ICU2 + 1);
577 	outb(IO_ICU1 + 1,
578 	     (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask));
579 	outb(IO_ICU2 + 1,
580 	     (icu2_mask & ~(npx0_imask >> 8))
581 	     | (old_icu2_mask & (npx0_imask >> 8)));
582 	idt[npx_intrno] = save_idt_npxintr;
583 	enable_intr();		/* back to usual state */
584 }
585 
586 #endif /* NNPX > 0 */
587