xref: /titanic_50/usr/src/uts/intel/ia32/os/fpu.c (revision e6eb57e72471348376359efe9105d50bf487a312)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
28 /*		All Rights Reserved				*/
29 
30 /*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
31 /*		All Rights Reserved				*/
32 
33 #pragma ident	"%Z%%M%	%I%	%E% SMI"
34 
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/signal.h>
38 #include <sys/regset.h>
39 #include <sys/privregs.h>
40 #include <sys/psw.h>
41 #include <sys/trap.h>
42 #include <sys/fault.h>
43 #include <sys/systm.h>
44 #include <sys/user.h>
45 #include <sys/file.h>
46 #include <sys/proc.h>
47 #include <sys/pcb.h>
48 #include <sys/lwp.h>
49 #include <sys/cpuvar.h>
50 #include <sys/thread.h>
51 #include <sys/disp.h>
52 #include <sys/fp.h>
53 #include <sys/siginfo.h>
54 #include <sys/archsystm.h>
55 #include <sys/kmem.h>
56 #include <sys/debug.h>
57 #include <sys/x86_archext.h>
58 #include <sys/sysmacros.h>
59 
60 /*CSTYLED*/
61 #pragma	align 16 (sse_initial)
62 
63 /*
64  * Initial kfpu state for SSE/SSE2 used by fpinit()
65  */
66 const struct fxsave_state sse_initial = {
67 	FPU_CW_INIT,	/* fx_fcw */
68 	0,		/* fx_fsw */
69 	0,		/* fx_fctw */
70 	0,		/* fx_fop */
71 #if defined(__amd64)
72 	0,		/* fx_rip */
73 	0,		/* fx_rdp */
74 #else
75 	0,		/* fx_eip */
76 	0,		/* fx_cs */
77 	0,		/* __fx_ign0 */
78 	0,		/* fx_dp */
79 	0,		/* fx_ds */
80 	0,		/* __fx_ign1 */
81 #endif /* __amd64 */
82 	SSE_MXCSR_INIT	/* fx_mxcsr */
83 	/* rest of structure is zero */
84 };
85 
86 /*
87  * mxcsr_mask value (possibly reset in fpu_probe); used to avoid
88  * the #gp exception caused by setting unsupported bits in the
89  * MXCSR register
90  */
91 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT;
92 
93 /*
94  * Initial kfpu state for x87 used by fpinit()
95  */
96 const struct fnsave_state x87_initial = {
97 	FPU_CW_INIT,	/* f_fcw */
98 	0,		/* __f_ign0 */
99 	0,		/* f_fsw */
100 	0,		/* __f_ign1 */
101 	0xffff,		/* f_ftw */
102 	/* rest of structure is zero */
103 };
104 
105 #if defined(__amd64)
106 #define	fpsave_ctxt	fpxsave_ctxt
107 #elif defined(__i386)
108 /*
109  * This vector is patched to fpxsave_ctxt() if we discover
110  * we have an SSE-capable chip in fpu_probe().
111  */
112 void (*fpsave_ctxt)(void *) = fpnsave_ctxt;
113 #endif
114 
115 static int fpe_sicode(uint_t);
116 static int fpe_simd_sicode(uint_t);
117 
118 /*
119  * Copy the state of parent lwp's floating point context into the new lwp.
120  * Invoked for both fork() and lwp_create().
121  *
122  * Note that we inherit -only- the control state (e.g. exception masks,
123  * rounding, precision control, etc.); the FPU registers are otherwise
124  * reset to their initial state.
125  */
126 static void
127 fp_new_lwp(kthread_id_t t, kthread_id_t ct)
128 {
129 	struct fpu_ctx *fp;		/* parent fpu context */
130 	struct fpu_ctx *cfp;		/* new fpu context */
131 	struct fxsave_state *fx, *cfx;
132 
133 	ASSERT(fp_kind != FP_NO);
134 
135 	fp = &t->t_lwp->lwp_pcb.pcb_fpu;
136 	cfp = &ct->t_lwp->lwp_pcb.pcb_fpu;
137 
138 	/*
139 	 * If the parent FPU state is still in the FPU hw then save it;
140 	 * conveniently, fp_save() already does this for us nicely.
141 	 */
142 	fp_save(fp);
143 
144 	cfp->fpu_flags = FPU_EN | FPU_VALID;
145 	cfp->fpu_regs.kfpu_status = 0;
146 	cfp->fpu_regs.kfpu_xstatus = 0;
147 
148 #if defined(__amd64)
149 	fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
150 	cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
151 	bcopy(&sse_initial, cfx, sizeof (*cfx));
152 	cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
153 	cfx->fx_fcw = fx->fx_fcw;
154 #else
155 	if (fp_kind == __FP_SSE) {
156 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
157 		cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
158 		bcopy(&sse_initial, cfx, sizeof (*cfx));
159 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
160 		cfx->fx_fcw = fx->fx_fcw;
161 	} else {
162 		struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
163 		struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
164 
165 		bcopy(&x87_initial, cfn, sizeof (*cfn));
166 		cfn->f_fcw = fn->f_fcw;
167 	}
168 #endif
169 	installctx(ct, cfp,
170 	    fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
171 	/*
172 	 * Now, when the new lwp starts running, it will take a trap
173 	 * that will be handled inline in the trap table to cause
174 	 * the appropriate f*rstor instruction to load the save area we
175 	 * constructed above directly into the hardware.
176 	 */
177 }
178 
179 /*
180  * Free any state associated with floating point context.
181  * Fp_free can be called in three cases:
182  * 1) from reaper -> thread_free -> ctxfree -> fp_free
183  *	fp context belongs to a thread on deathrow
184  *	nothing to do,  thread will never be resumed
185  *	thread calling ctxfree is reaper
186  *
187  * 2) from exec -> ctxfree -> fp_free
188  *	fp context belongs to the current thread
189  *	must disable fpu, thread calling ctxfree is curthread
190  *
191  * 3) from restorecontext -> setfpregs -> fp_free
192  *	we have a modified context in the memory (lwp->pcb_fpu)
193  *	disable fpu and release the fp context for the CPU
194  *
195  */
196 /*ARGSUSED*/
197 void
198 fp_free(struct fpu_ctx *fp, int isexec)
199 {
200 	ASSERT(fp_kind != FP_NO);
201 
202 	if (fp->fpu_flags & FPU_VALID)
203 		return;
204 
205 	kpreempt_disable();
206 	/*
207 	 * We want to do fpsave rather than fpdisable so that we can
208 	 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit
209 	 */
210 	fp->fpu_flags |= FPU_VALID;
211 	/* If for current thread disable FP to track FPU_VALID */
212 	if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
213 		/* Clear errors if any to prevent frstor from complaining */
214 		(void) fperr_reset();
215 		if (fp_kind == __FP_SSE)
216 			(void) fpxerr_reset();
217 		fpdisable();
218 	}
219 	kpreempt_enable();
220 }
221 
222 /*
223  * Store the floating point state and disable the floating point unit.
224  */
225 void
226 fp_save(struct fpu_ctx *fp)
227 {
228 	ASSERT(fp_kind != FP_NO);
229 
230 	kpreempt_disable();
231 	if (!fp || fp->fpu_flags & FPU_VALID) {
232 		kpreempt_enable();
233 		return;
234 	}
235 	ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
236 
237 #if defined(__amd64)
238 	fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
239 #else
240 	switch (fp_kind) {
241 	case __FP_SSE:
242 		fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
243 		break;
244 	default:
245 		fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
246 		break;
247 	}
248 #endif
249 	fp->fpu_flags |= FPU_VALID;
250 	kpreempt_enable();
251 }
252 
253 /*
254  * Restore the FPU context for the thread:
255  * The possibilities are:
256  *	1. No active FPU context: Load the new context into the FPU hw
257  *	   and enable the FPU.
258  */
259 void
260 fp_restore(struct fpu_ctx *fp)
261 {
262 #if defined(__amd64)
263 	fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
264 #else
265 	/* case 2 */
266 	if (fp_kind == __FP_SSE)
267 		fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
268 	else
269 		fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn);
270 #endif
271 	fp->fpu_flags &= ~FPU_VALID;
272 }
273 
274 
275 /*
276  * Seeds the initial state for the current thread.  The possibilities are:
277  *      1. Another process has modified the FPU state before we have done any
278  *         initialization: Load the FPU state from the LWP state.
279  *      2. The FPU state has not been externally modified:  Load a clean state.
280  */
281 static void
282 fp_seed(void)
283 {
284 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
285 
286 	ASSERT(curthread->t_preempt >= 1);
287 	ASSERT((fp->fpu_flags & FPU_EN) == 0);
288 
289 	/*
290 	 * Always initialize a new context and initialize the hardware.
291 	 */
292 	installctx(curthread, fp,
293 	    fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
294 	fpinit();
295 
296 	/*
297 	 * If FPU_VALID is set, it means someone has modified registers via
298 	 * /proc.  In this case, restore the current lwp's state.
299 	 */
300 	if (fp->fpu_flags & FPU_VALID)
301 		fp_restore(fp);
302 
303 	ASSERT((fp->fpu_flags & FPU_VALID) == 0);
304 	fp->fpu_flags = FPU_EN;
305 }
306 
307 /*
308  * This routine is called from trap() when User thread takes No Extension
309  * Fault. The possiblities are:
310  *	1. User thread has executed a FP instruction for the first time.
311  *	   Save current FPU context if any. Initialize FPU, setup FPU
312  *	   context for the thread and enable FP hw.
313  *	2. Thread's pcb has a valid FPU state: Restore the FPU state and
314  *	   enable FP hw.
315  *
316  * Note that case #2 is inlined in the trap table.
317  */
318 int
319 fpnoextflt(struct regs *rp)
320 {
321 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
322 
323 #if !defined(__lint)
324 	ASSERT(sizeof (struct fxsave_state) == 512 &&
325 	    sizeof (struct fnsave_state) == 108);
326 	ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
327 #if defined(__i386)
328 	ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu));
329 #endif	/* __i386 */
330 #endif	/* !__lint */
331 
332 	/*
333 	 * save area MUST be 16-byte aligned, else will page fault
334 	 */
335 	ASSERT(((uintptr_t)(&fp->fpu_regs.kfpu_u.kfpu_fx) & 0xf) == 0);
336 
337 	kpreempt_disable();
338 	/*
339 	 * Now we can enable the interrupts.
340 	 * (NOTE: fp-no-coprocessor comes thru interrupt gate)
341 	 */
342 	sti();
343 
344 	if (!fpu_exists) { /* check for FPU hw exists */
345 		if (fp_kind == FP_NO) {
346 			uint32_t inst;
347 
348 			/*
349 			 * When the system has no floating point support,
350 			 * i.e. no FP hardware and no emulator, skip the
351 			 * two kinds of FP instruction that occur in
352 			 * fpstart.  Allows processes that do no real FP
353 			 * to run normally.
354 			 */
355 			if (fuword32((void *)rp->r_pc, &inst) != -1 &&
356 			    ((inst & 0xFFFF) == 0x7dd9 ||
357 			    (inst & 0xFFFF) == 0x6dd9)) {
358 				rp->r_pc += 3;
359 				kpreempt_enable();
360 				return (0);
361 			}
362 		}
363 
364 		/*
365 		 * If we have neither a processor extension nor
366 		 * an emulator, kill the process OR panic the kernel.
367 		 */
368 		kpreempt_enable();
369 		return (1); /* error */
370 	}
371 
372 #if !defined(__xpv)	/* XXPV	Is this ifdef needed now? */
373 	/*
374 	 * A paranoid cross-check: for the SSE case, ensure that %cr4 is
375 	 * configured to enable fully fledged (%xmm) fxsave/fxrestor on
376 	 * this CPU.  For the non-SSE case, ensure that it isn't.
377 	 */
378 	ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
379 	    (fp_kind != __FP_SSE &&
380 	    (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0));
381 #endif
382 
383 	if (fp->fpu_flags & FPU_EN) {
384 		/* case 2 */
385 		fp_restore(fp);
386 	} else {
387 		/* case 1 */
388 		fp_seed();
389 	}
390 	kpreempt_enable();
391 	return (0);
392 }
393 
394 
395 /*
396  * Handle a processor extension overrun fault
397  * Returns non zero for error.
398  *
399  * XXX	Shouldn't this just be abolished given that we're not supporting
400  *	anything prior to Pentium?
401  */
402 
403 /* ARGSUSED */
404 int
405 fpextovrflt(struct regs *rp)
406 {
407 #if !defined(__xpv)		/* XXPV	Do we need this ifdef either */
408 	ulong_t cur_cr0;
409 
410 	ASSERT(fp_kind != FP_NO);
411 
412 	cur_cr0 = getcr0();
413 	fpinit();		/* initialize the FPU hardware */
414 	setcr0(cur_cr0);
415 #endif
416 	sti();
417 	return (1); 		/* error, send SIGSEGV signal to the thread */
418 }
419 
420 /*
421  * Handle a processor extension error fault
422  * Returns non zero for error.
423  */
424 
425 /*ARGSUSED*/
426 int
427 fpexterrflt(struct regs *rp)
428 {
429 	uint32_t fpcw, fpsw;
430 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
431 
432 	ASSERT(fp_kind != FP_NO);
433 
434 	/*
435 	 * Now we can enable the interrupts.
436 	 * (NOTE: x87 fp exceptions come thru interrupt gate)
437 	 */
438 	sti();
439 
440 	if (!fpu_exists)
441 		return (FPE_FLTINV);
442 
443 	/*
444 	 * Do an unconditional save of the FP state.  If it's dirty (TS=0),
445 	 * it'll be saved into the fpu context area passed in (that of the
446 	 * current thread).  If it's not dirty (it may not be, due to
447 	 * an intervening save due to a context switch between the sti(),
448 	 * above and here, then it's safe to just use the stored values in
449 	 * the context save area to determine the cause of the fault.
450 	 */
451 	fp_save(fp);
452 
453 	/* clear exception flags in saved state, as if by fnclex */
454 #if defined(__amd64)
455 	fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
456 	fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
457 	fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
458 #else
459 		switch (fp_kind) {
460 		case __FP_SSE:
461 			fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
462 			fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
463 			fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
464 			break;
465 		default:
466 			fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw;
467 			fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw;
468 			fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
469 			break;
470 		}
471 #endif
472 
473 	fp->fpu_regs.kfpu_status = fpsw;
474 
475 	if ((fpsw & FPS_ES) == 0)
476 		return (0);		/* No exception */
477 
478 	/*
479 	 * "and" the exception flags with the complement of the mask
480 	 * bits to determine which exception occurred
481 	 */
482 	return (fpe_sicode(fpsw & ~fpcw & 0x3f));
483 }
484 
485 /*
486  * Handle an SSE/SSE2 precise exception.
487  * Returns a non-zero sicode for error.
488  */
489 /*ARGSUSED*/
490 int
491 fpsimderrflt(struct regs *rp)
492 {
493 	uint32_t mxcsr, xmask;
494 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
495 
496 	ASSERT(fp_kind == __FP_SSE);
497 
498 	/*
499 	 * NOTE: Interrupts are disabled during execution of this
500 	 * function.  They are enabled by the caller in trap.c.
501 	 */
502 
503 	/*
504 	 * The only way we could have gotten here if there is no FP unit
505 	 * is via a user executing an INT $19 instruction, so there is
506 	 * no fault in that case.
507 	 */
508 	if (!fpu_exists)
509 		return (0);
510 
511 	/*
512 	 * Do an unconditional save of the FP state.  If it's dirty (TS=0),
513 	 * it'll be saved into the fpu context area passed in (that of the
514 	 * current thread).  If it's not dirty, then it's safe to just use
515 	 * the stored values in the context save area to determine the
516 	 * cause of the fault.
517 	 */
518 	fp_save(fp); 		/* save the FPU state */
519 
520 	mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx.fx_mxcsr;
521 
522 	fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
523 
524 	fp->fpu_regs.kfpu_xstatus = mxcsr;
525 
526 	/*
527 	 * compute the mask that determines which conditions can cause
528 	 * a #xm exception, and use this to clean the status bits so that
529 	 * we can identify the true cause of this one.
530 	 */
531 	xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS;
532 	return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask));
533 }
534 
535 /*
536  * In the unlikely event that someone is relying on this subcode being
537  * FPE_FLTILL for denormalize exceptions, it can always be patched back
538  * again to restore old behaviour.
539  */
540 int fpe_fltden = FPE_FLTDEN;
541 
542 /*
543  * Map from the FPU status word to the FP exception si_code.
544  */
545 static int
546 fpe_sicode(uint_t sw)
547 {
548 	if (sw & FPS_IE)
549 		return (FPE_FLTINV);
550 	if (sw & FPS_ZE)
551 		return (FPE_FLTDIV);
552 	if (sw & FPS_DE)
553 		return (fpe_fltden);
554 	if (sw & FPS_OE)
555 		return (FPE_FLTOVF);
556 	if (sw & FPS_UE)
557 		return (FPE_FLTUND);
558 	if (sw & FPS_PE)
559 		return (FPE_FLTRES);
560 	return (FPE_FLTINV);	/* default si_code for other exceptions */
561 }
562 
563 /*
564  * Map from the SSE status word to the FP exception si_code.
565  */
566 static int
567 fpe_simd_sicode(uint_t sw)
568 {
569 	if (sw & SSE_IE)
570 		return (FPE_FLTINV);
571 	if (sw & SSE_ZE)
572 		return (FPE_FLTDIV);
573 	if (sw & SSE_DE)
574 		return (FPE_FLTDEN);
575 	if (sw & SSE_OE)
576 		return (FPE_FLTOVF);
577 	if (sw & SSE_UE)
578 		return (FPE_FLTUND);
579 	if (sw & SSE_PE)
580 		return (FPE_FLTRES);
581 	return (FPE_FLTINV);	/* default si_code for other exceptions */
582 }
583 
584 /*
585  * This routine is invoked as part of libc's __fpstart implementation
586  * via sysi86(2).
587  *
588  * It may be called -before- any context has been assigned in which case
589  * we try and avoid touching the hardware.  Or it may be invoked well
590  * after the context has been assigned and fiddled with, in which case
591  * just tweak it directly.
592  */
593 void
594 fpsetcw(uint16_t fcw, uint32_t mxcsr)
595 {
596 	struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu;
597 	struct fxsave_state *fx;
598 
599 	if (!fpu_exists || fp_kind == FP_NO)
600 		return;
601 
602 	if ((fp->fpu_flags & FPU_EN) == 0) {
603 		if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) {
604 			/*
605 			 * Common case.  Floating point unit not yet
606 			 * enabled, and kernel already intends to initialize
607 			 * the hardware the way the caller wants.
608 			 */
609 			return;
610 		}
611 		/*
612 		 * Hmm.  Userland wants a different default.
613 		 * Do a fake "first trap" to establish the context, then
614 		 * handle as if we already had a context before we came in.
615 		 */
616 		kpreempt_disable();
617 		fp_seed();
618 		kpreempt_enable();
619 	}
620 
621 	/*
622 	 * Ensure that the current hardware state is flushed back to the
623 	 * pcb, then modify that copy.  Next use of the fp will
624 	 * restore the context.
625 	 */
626 	fp_save(fp);
627 
628 #if defined(__amd64)
629 	fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
630 	fx->fx_fcw = fcw;
631 	fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
632 #else
633 	switch (fp_kind) {
634 	case __FP_SSE:
635 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
636 		fx->fx_fcw = fcw;
637 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
638 		break;
639 	default:
640 		fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
641 		break;
642 	}
643 #endif
644 }
645