xref: /titanic_50/usr/src/uts/intel/ia32/os/fpu.c (revision 90f050286227cf4c4f8aa425555d04723d331d48)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
29 /*		All Rights Reserved				*/
30 
31 /*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
32 /*		All Rights Reserved				*/
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/signal.h>
39 #include <sys/regset.h>
40 #include <sys/privregs.h>
41 #include <sys/psw.h>
42 #include <sys/trap.h>
43 #include <sys/fault.h>
44 #include <sys/systm.h>
45 #include <sys/user.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/pcb.h>
49 #include <sys/lwp.h>
50 #include <sys/cpuvar.h>
51 #include <sys/thread.h>
52 #include <sys/disp.h>
53 #include <sys/fp.h>
54 #include <sys/siginfo.h>
55 #include <sys/archsystm.h>
56 #include <sys/kmem.h>
57 #include <sys/debug.h>
58 #include <sys/x86_archext.h>
59 #include <sys/sysmacros.h>
60 
61 /*CSTYLED*/
62 #pragma	align 16 (sse_initial)
63 
64 /*
65  * Initial kfpu state for SSE/SSE2 used by fpinit()
66  */
67 const struct fxsave_state sse_initial = {
68 	FPU_CW_INIT,	/* fx_fcw */
69 	0,		/* fx_fsw */
70 	0,		/* fx_fctw */
71 	0,		/* fx_fop */
72 #if defined(__amd64)
73 	0,		/* fx_rip */
74 	0,		/* fx_rdp */
75 #else
76 	0,		/* fx_eip */
77 	0,		/* fx_cs */
78 	0,		/* __fx_ign0 */
79 	0,		/* fx_dp */
80 	0,		/* fx_ds */
81 	0,		/* __fx_ign1 */
82 #endif /* __amd64 */
83 	SSE_MXCSR_INIT	/* fx_mxcsr */
84 	/* rest of structure is zero */
85 };
86 
87 /*
88  * mxcsr_mask value (possibly reset in fpu_probe); used to avoid
89  * the #gp exception caused by setting unsupported bits in the
90  * MXCSR register
91  */
92 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT;
93 
94 /*
95  * Initial kfpu state for x87 used by fpinit()
96  */
97 const struct fnsave_state x87_initial = {
98 	FPU_CW_INIT,	/* f_fcw */
99 	0,		/* __f_ign0 */
100 	0,		/* f_fsw */
101 	0,		/* __f_ign1 */
102 	0xffff,		/* f_ftw */
103 	/* rest of structure is zero */
104 };
105 
106 #if defined(__amd64)
107 #define	fpsave_begin	fpxsave_begin
108 #elif defined(__i386)
109 /*
110  * This vector is patched to fpxsave_begin() if we discover
111  * we have an SSE-capable chip in fpu_probe().
112  */
113 void (*fpsave_begin)(void *) = fpnsave_begin;
114 #endif
115 
116 static int fpe_sicode(uint_t);
117 static int fpe_simd_sicode(uint_t);
118 
119 /*
120  * Copy the state of parent lwp's floating point context into the new lwp.
121  * Invoked for both fork() and lwp_create().
122  *
123  * Note that we inherit -only- the control state (e.g. exception masks,
124  * rounding, precision control, etc.); the FPU registers are otherwise
125  * reset to their initial state.
126  */
127 static void
128 fp_new_lwp(kthread_id_t t, kthread_id_t ct)
129 {
130 	struct fpu_ctx *fp;		/* parent fpu context */
131 	struct fpu_ctx *cfp;		/* new fpu context */
132 	struct fxsave_state *fx, *cfx;
133 
134 	ASSERT(fp_kind != FP_NO);
135 
136 	fp = &t->t_lwp->lwp_pcb.pcb_fpu;
137 	cfp = &ct->t_lwp->lwp_pcb.pcb_fpu;
138 
139 	/*
140 	 * If the parent FPU state is still in the FPU hw then save it;
141 	 * conveniently, fp_save() already does this for us nicely.
142 	 */
143 	fp_save(fp);
144 
145 	cfp->fpu_flags = FPU_EN | FPU_VALID;
146 	cfp->fpu_regs.kfpu_status = 0;
147 	cfp->fpu_regs.kfpu_xstatus = 0;
148 
149 #if defined(__amd64)
150 	fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
151 	cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
152 	bcopy(&sse_initial, cfx, sizeof (*cfx));
153 	cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
154 	cfx->fx_fcw = fx->fx_fcw;
155 #else
156 	if (fp_kind == __FP_SSE) {
157 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
158 		cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
159 		bcopy(&sse_initial, cfx, sizeof (*cfx));
160 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
161 		cfx->fx_fcw = fx->fx_fcw;
162 	} else {
163 		struct fnsave_state *fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
164 		struct fnsave_state *cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
165 
166 		bcopy(&x87_initial, cfn, sizeof (*cfn));
167 		cfn->f_fcw = fn->f_fcw;
168 	}
169 #endif
170 	installctx(ct, cfp,
171 	    fpsave_begin, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
172 	/*
173 	 * Now, when the new lwp starts running, it will take a trap
174 	 * that will be handled inline in the trap table to cause
175 	 * the appropriate f*rstor instruction to load the save area we
176 	 * constructed above directly into the hardware.
177 	 */
178 }
179 
180 /*
181  * Free any state associated with floating point context.
182  * Fp_free can be called in three cases:
183  * 1) from reaper -> thread_free -> ctxfree -> fp_free
184  *	fp context belongs to a thread on deathrow
185  *	nothing to do,  thread will never be resumed
186  *	thread calling ctxfree is reaper
187  *
188  * 2) from exec -> ctxfree -> fp_free
189  *	fp context belongs to the current thread
190  *	must disable fpu, thread calling ctxfree is curthread
191  *
192  * 3) from restorecontext -> setfpregs -> fp_free
193  *	we have a modified context in the memory (lwp->pcb_fpu)
194  *	disable fpu and release the fp context for the CPU
195  *
196  */
197 /*ARGSUSED*/
198 void
199 fp_free(struct fpu_ctx *fp, int isexec)
200 {
201 	ASSERT(fp_kind != FP_NO);
202 
203 	if (fp->fpu_flags & FPU_VALID)
204 		return;
205 
206 	kpreempt_disable();
207 	/*
208 	 * We want to do fpsave rather than fpdisable so that we can
209 	 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit
210 	 */
211 	fp->fpu_flags |= FPU_VALID;
212 	/* If for current thread disable FP to track FPU_VALID */
213 	if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
214 		/* Clear errors if any to prevent frstor from complaining */
215 		(void) fperr_reset();
216 		if (fp_kind == __FP_SSE)
217 			(void) fpxerr_reset();
218 		fpdisable();
219 	}
220 	kpreempt_enable();
221 }
222 
223 /*
224  * Store the floating point state and disable the floating point unit.
225  */
226 void
227 fp_save(struct fpu_ctx *fp)
228 {
229 	ASSERT(fp_kind != FP_NO);
230 
231 	kpreempt_disable();
232 	if (!fp || fp->fpu_flags & FPU_VALID) {
233 		kpreempt_enable();
234 		return;
235 	}
236 	ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
237 
238 #if defined(__amd64)
239 	fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
240 #else
241 	switch (fp_kind) {
242 	case __FP_SSE:
243 		fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
244 		break;
245 	default:
246 		fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
247 		break;
248 	}
249 #endif
250 	fp->fpu_flags |= FPU_VALID;
251 	kpreempt_enable();
252 }
253 
254 /*
255  * Restore the FPU context for the thread:
256  * The possibilities are:
257  *	1. No active FPU context: Load the new context into the FPU hw
258  *	   and enable the FPU.
259  */
260 void
261 fp_restore(struct fpu_ctx *fp)
262 {
263 #if defined(__amd64)
264 	fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
265 #else
266 	/* case 2 */
267 	if (fp_kind == __FP_SSE)
268 		fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
269 	else
270 		fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn);
271 #endif
272 	fp->fpu_flags &= ~FPU_VALID;
273 }
274 
275 
276 /*
277  * Seeds the initial state for the current thread.  The possibilities are:
278  *      1. Another process has modified the FPU state before we have done any
279  *         initialization: Load the FPU state from the LWP state.
280  *      2. The FPU state has not been externally modified:  Load a clean state.
281  */
282 static void
283 fp_seed(void)
284 {
285 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
286 
287 	ASSERT(curthread->t_preempt >= 1);
288 	ASSERT((fp->fpu_flags & FPU_EN) == 0);
289 
290 	/*
291 	 * Always initialize a new context and initialize the hardware.
292 	 */
293 	installctx(curthread, fp,
294 	    fpsave_begin, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
295 	fpinit();
296 
297 	/*
298 	 * If FPU_VALID is set, it means someone has modified registers via
299 	 * /proc.  In this case, restore the current lwp's state.
300 	 */
301 	if (fp->fpu_flags & FPU_VALID)
302 		fp_restore(fp);
303 
304 	ASSERT((fp->fpu_flags & FPU_VALID) == 0);
305 	fp->fpu_flags = FPU_EN;
306 }
307 
308 /*
309  * This routine is called from trap() when User thread takes No Extension
310  * Fault. The possiblities are:
311  *	1. User thread has executed a FP instruction for the first time.
312  *	   Save current FPU context if any. Initialize FPU, setup FPU
313  *	   context for the thread and enable FP hw.
314  *	2. Thread's pcb has a valid FPU state: Restore the FPU state and
315  *	   enable FP hw.
316  *
317  * Note that case #2 is inlined in the trap table.
318  */
319 int
320 fpnoextflt(struct regs *rp)
321 {
322 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
323 
324 #if !defined(__lint)
325 	ASSERT(sizeof (struct fxsave_state) == 512 &&
326 	    sizeof (struct fnsave_state) == 108);
327 	ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
328 #if defined(__i386)
329 	ASSERT(sizeof (struct fpu) == sizeof (struct __old_fpu));
330 #endif	/* __i386 */
331 #endif	/* !__lint */
332 
333 	/*
334 	 * save area MUST be 16-byte aligned, else will page fault
335 	 */
336 	ASSERT(((uintptr_t)(&fp->fpu_regs.kfpu_u.kfpu_fx) & 0xf) == 0);
337 
338 	kpreempt_disable();
339 	/*
340 	 * Now we can enable the interrupts.
341 	 * (NOTE: fp-no-coprocessor comes thru interrupt gate)
342 	 */
343 	sti();
344 
345 	if (!fpu_exists) { /* check for FPU hw exists */
346 		if (fp_kind == FP_NO) {
347 			uint32_t inst;
348 
349 			/*
350 			 * When the system has no floating point support,
351 			 * i.e. no FP hardware and no emulator, skip the
352 			 * two kinds of FP instruction that occur in
353 			 * fpstart.  Allows processes that do no real FP
354 			 * to run normally.
355 			 */
356 			if (fuword32((void *)rp->r_pc, &inst) != -1 &&
357 			    ((inst & 0xFFFF) == 0x7dd9 ||
358 			    (inst & 0xFFFF) == 0x6dd9)) {
359 				rp->r_pc += 3;
360 				kpreempt_enable();
361 				return (0);
362 			}
363 		}
364 
365 		/*
366 		 * If we have neither a processor extension nor
367 		 * an emulator, kill the process OR panic the kernel.
368 		 */
369 		kpreempt_enable();
370 		return (1); /* error */
371 	}
372 
373 	/*
374 	 * A paranoid cross-check: for the SSE case, ensure that %cr4 is
375 	 * configured to enable fully fledged (%xmm) fxsave/fxrestor on
376 	 * this CPU.  For the non-SSE case, ensure that it isn't.
377 	 */
378 	ASSERT((fp_kind == __FP_SSE && (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
379 	    (fp_kind != __FP_SSE &&
380 	    (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0));
381 
382 	if (fp->fpu_flags & FPU_EN) {
383 		/* case 2 */
384 		fp_restore(fp);
385 	} else {
386 		/* case 1 */
387 		fp_seed();
388 	}
389 	kpreempt_enable();
390 	return (0);
391 }
392 
393 
394 /*
395  * Handle a processor extension overrun fault
396  * Returns non zero for error.
397  */
398 
399 /* ARGSUSED */
400 int
401 fpextovrflt(struct regs *rp)
402 {
403 	ulong_t cur_cr0;
404 
405 	ASSERT(fp_kind != FP_NO);
406 
407 	cur_cr0 = getcr0();
408 	fpinit();		/* initialize the FPU hardware */
409 	setcr0(cur_cr0);
410 	sti();
411 
412 	return (1); 		/* error, send SIGSEGV signal to the thread */
413 }
414 
415 /*
416  * Handle a processor extension error fault
417  * Returns non zero for error.
418  */
419 
420 /*ARGSUSED*/
421 int
422 fpexterrflt(struct regs *rp)
423 {
424 	uint32_t fpcwsw;
425 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
426 
427 	ASSERT(fp_kind != FP_NO);
428 
429 	fpcwsw = fpgetcwsw();
430 	/*
431 	 * Now we can enable the interrupts.
432 	 * (NOTE: x87 fp exceptions come thru interrupt gate)
433 	 */
434 	sti();
435 
436 	if ((fpcwsw & FPS_ES) == 0)
437 		return (0);	/* No exception */
438 
439 	if (fpu_exists) {
440 		fp_save(fp);
441 		/* clear exception flags in saved state, as if by fnclex */
442 #if defined(__amd64)
443 		fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
444 #else
445 		switch (fp_kind) {
446 		case __FP_SSE:
447 			fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
448 			break;
449 		default:
450 			fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
451 			break;
452 		}
453 #endif
454 	}
455 	fp->fpu_regs.kfpu_status = fpcwsw & 0xffff;
456 	/*
457 	 * "and" the exception flags with the complement of the mask
458 	 * bits to determine which exception occurred
459 	 */
460 	return (fpe_sicode(fpcwsw & ~(fpcwsw >> 16) & 0x3f));
461 }
462 
463 /*
464  * Handle an SSE/SSE2 precise exception.
465  * Returns a non-zero sicode for error.
466  */
467 /*ARGSUSED*/
468 int
469 fpsimderrflt(struct regs *rp)
470 {
471 	uint32_t mxcsr, xmask;
472 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
473 
474 	ASSERT(fp_kind == __FP_SSE);
475 
476 	mxcsr = fpgetmxcsr();
477 	if (fpu_exists) {
478 		fp_save(fp); 		/* save the FPU state */
479 		fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
480 	} else {
481 		fp->fpu_regs.kfpu_status = fpgetcwsw() & 0xffff;
482 	}
483 	fp->fpu_regs.kfpu_xstatus = mxcsr;
484 
485 	/*
486 	 * compute the mask that determines which conditions can cause
487 	 * a #xm exception, and use this to clean the status bits so that
488 	 * we can identify the true cause of this one.
489 	 */
490 	xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS;
491 	return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask));
492 }
493 
494 /*
495  * In the unlikely event that someone is relying on this subcode being
496  * FPE_FLTILL for denormalize exceptions, it can always be patched back
497  * again to restore old behaviour.
498  */
499 int fpe_fltden = FPE_FLTDEN;
500 
501 /*
502  * Map from the FPU status word to the FP exception si_code.
503  */
504 static int
505 fpe_sicode(uint_t sw)
506 {
507 	if (sw & FPS_IE)
508 		return (FPE_FLTINV);
509 	if (sw & FPS_ZE)
510 		return (FPE_FLTDIV);
511 	if (sw & FPS_DE)
512 		return (fpe_fltden);
513 	if (sw & FPS_OE)
514 		return (FPE_FLTOVF);
515 	if (sw & FPS_UE)
516 		return (FPE_FLTUND);
517 	if (sw & FPS_PE)
518 		return (FPE_FLTRES);
519 	return (FPE_FLTINV);	/* default si_code for other exceptions */
520 }
521 
522 /*
523  * Map from the SSE status word to the FP exception si_code.
524  */
525 static int
526 fpe_simd_sicode(uint_t sw)
527 {
528 	if (sw & SSE_IE)
529 		return (FPE_FLTINV);
530 	if (sw & SSE_ZE)
531 		return (FPE_FLTDIV);
532 	if (sw & SSE_DE)
533 		return (FPE_FLTDEN);
534 	if (sw & SSE_OE)
535 		return (FPE_FLTOVF);
536 	if (sw & SSE_UE)
537 		return (FPE_FLTUND);
538 	if (sw & SSE_PE)
539 		return (FPE_FLTRES);
540 	return (FPE_FLTINV);	/* default si_code for other exceptions */
541 }
542 
543 /*
544  * This routine is invoked as part of libc's __fpstart implementation
545  * via sysi86(2).
546  *
547  * It may be called -before- any context has been assigned in which case
548  * we try and avoid touching the hardware.  Or it may be invoked well
549  * after the context has been assigned and fiddled with, in which case
550  * just tweak it directly.
551  */
552 void
553 fpsetcw(uint16_t fcw, uint32_t mxcsr)
554 {
555 	struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu;
556 	struct fxsave_state *fx;
557 
558 	if (!fpu_exists || fp_kind == FP_NO)
559 		return;
560 
561 	if ((fp->fpu_flags & FPU_EN) == 0) {
562 		if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) {
563 			/*
564 			 * Common case.  Floating point unit not yet
565 			 * enabled, and kernel already intends to initialize
566 			 * the hardware the way the caller wants.
567 			 */
568 			return;
569 		}
570 		/*
571 		 * Hmm.  Userland wants a different default.
572 		 * Do a fake "first trap" to establish the context, then
573 		 * handle as if we already had a context before we came in.
574 		 */
575 		kpreempt_disable();
576 		fp_seed();
577 		kpreempt_enable();
578 	}
579 
580 	/*
581 	 * Ensure that the current hardware state is flushed back to the
582 	 * pcb, then modify that copy.  Next use of the fp will
583 	 * restore the context.
584 	 */
585 	fp_save(fp);
586 
587 #if defined(__amd64)
588 	fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
589 	fx->fx_fcw = fcw;
590 	fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
591 #else
592 	switch (fp_kind) {
593 	case __FP_SSE:
594 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
595 		fx->fx_fcw = fcw;
596 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
597 		break;
598 	default:
599 		fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
600 		break;
601 	}
602 #endif
603 }
604