xref: /titanic_50/usr/src/uts/intel/ia32/os/fpu.c (revision bc0e91320069f0bcaee43e80a7ea686d9efa2d08)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
27 /*		All Rights Reserved				*/
28 
29 /*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
30 /*		All Rights Reserved				*/
31 
32 /*
33  * Copyright (c) 2009, Intel Corporation.
34  * All rights reserved.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/signal.h>
40 #include <sys/regset.h>
41 #include <sys/privregs.h>
42 #include <sys/psw.h>
43 #include <sys/trap.h>
44 #include <sys/fault.h>
45 #include <sys/systm.h>
46 #include <sys/user.h>
47 #include <sys/file.h>
48 #include <sys/proc.h>
49 #include <sys/pcb.h>
50 #include <sys/lwp.h>
51 #include <sys/cpuvar.h>
52 #include <sys/thread.h>
53 #include <sys/disp.h>
54 #include <sys/fp.h>
55 #include <sys/siginfo.h>
56 #include <sys/archsystm.h>
57 #include <sys/kmem.h>
58 #include <sys/debug.h>
59 #include <sys/x86_archext.h>
60 #include <sys/sysmacros.h>
61 #include <sys/cmn_err.h>
62 
63 /* Legacy fxsave layout + xsave header + ymm */
64 #define	AVX_XSAVE_SIZE		(512 + 64 + 256)
65 
66 /*CSTYLED*/
67 #pragma	align 16 (sse_initial)
68 
69 /*
70  * Initial kfpu state for SSE/SSE2 used by fpinit()
71  */
72 const struct fxsave_state sse_initial = {
73 	FPU_CW_INIT,	/* fx_fcw */
74 	0,		/* fx_fsw */
75 	0,		/* fx_fctw */
76 	0,		/* fx_fop */
77 #if defined(__amd64)
78 	0,		/* fx_rip */
79 	0,		/* fx_rdp */
80 #else
81 	0,		/* fx_eip */
82 	0,		/* fx_cs */
83 	0,		/* __fx_ign0 */
84 	0,		/* fx_dp */
85 	0,		/* fx_ds */
86 	0,		/* __fx_ign1 */
87 #endif /* __amd64 */
88 	SSE_MXCSR_INIT	/* fx_mxcsr */
89 	/* rest of structure is zero */
90 };
91 
92 /*CSTYLED*/
93 #pragma	align 64 (avx_initial)
94 
95 /*
96  * Initial kfpu state for AVX used by fpinit()
97  */
98 const struct xsave_state avx_initial = {
99 	/*
100 	 * The definition below needs to be identical with sse_initial
101 	 * defined above.
102 	 */
103 	{
104 		FPU_CW_INIT,	/* fx_fcw */
105 		0,		/* fx_fsw */
106 		0,		/* fx_fctw */
107 		0,		/* fx_fop */
108 #if defined(__amd64)
109 		0,		/* fx_rip */
110 		0,		/* fx_rdp */
111 #else
112 		0,		/* fx_eip */
113 		0,		/* fx_cs */
114 		0,		/* __fx_ign0 */
115 		0,		/* fx_dp */
116 		0,		/* fx_ds */
117 		0,		/* __fx_ign1 */
118 #endif /* __amd64 */
119 		SSE_MXCSR_INIT	/* fx_mxcsr */
120 		/* rest of structure is zero */
121 	},
122 	/*
123 	 * bit0 = 1 for XSTATE_BV to indicate that legacy fields are valid,
124 	 * and CPU should initialize XMM/YMM.
125 	 */
126 	1,
127 	{0, 0}	/* These 2 bytes must be zero */
128 	/* rest of structure is zero */
129 };
130 
131 /*
132  * mxcsr_mask value (possibly reset in fpu_probe); used to avoid
133  * the #gp exception caused by setting unsupported bits in the
134  * MXCSR register
135  */
136 uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT;
137 
138 /*
139  * Initial kfpu state for x87 used by fpinit()
140  */
141 const struct fnsave_state x87_initial = {
142 	FPU_CW_INIT,	/* f_fcw */
143 	0,		/* __f_ign0 */
144 	0,		/* f_fsw */
145 	0,		/* __f_ign1 */
146 	0xffff,		/* f_ftw */
147 	/* rest of structure is zero */
148 };
149 
150 #if defined(__amd64)
151 /*
152  * This vector is patched to xsave_ctxt() if we discover we have an
153  * XSAVE-capable chip in fpu_probe.
154  */
155 void (*fpsave_ctxt)(void *) = fpxsave_ctxt;
156 #elif defined(__i386)
157 /*
158  * This vector is patched to fpxsave_ctxt() if we discover we have an
159  * SSE-capable chip in fpu_probe(). It is patched to xsave_ctxt
160  * if we discover we have an XSAVE-capable chip in fpu_probe.
161  */
162 void (*fpsave_ctxt)(void *) = fpnsave_ctxt;
163 #endif
164 
165 static int fpe_sicode(uint_t);
166 static int fpe_simd_sicode(uint_t);
167 
168 /*
169  * Copy the state of parent lwp's floating point context into the new lwp.
170  * Invoked for both fork() and lwp_create().
171  *
172  * Note that we inherit -only- the control state (e.g. exception masks,
173  * rounding, precision control, etc.); the FPU registers are otherwise
174  * reset to their initial state.
175  */
176 static void
fp_new_lwp(kthread_id_t t,kthread_id_t ct)177 fp_new_lwp(kthread_id_t t, kthread_id_t ct)
178 {
179 	struct fpu_ctx *fp;		/* parent fpu context */
180 	struct fpu_ctx *cfp;		/* new fpu context */
181 	struct fxsave_state *fx, *cfx;
182 #if defined(__i386)
183 	struct fnsave_state *fn, *cfn;
184 #endif
185 	struct xsave_state *cxs;
186 
187 	ASSERT(fp_kind != FP_NO);
188 
189 	fp = &t->t_lwp->lwp_pcb.pcb_fpu;
190 	cfp = &ct->t_lwp->lwp_pcb.pcb_fpu;
191 
192 	/*
193 	 * If the parent FPU state is still in the FPU hw then save it;
194 	 * conveniently, fp_save() already does this for us nicely.
195 	 */
196 	fp_save(fp);
197 
198 	cfp->fpu_flags = FPU_EN | FPU_VALID;
199 	cfp->fpu_regs.kfpu_status = 0;
200 	cfp->fpu_regs.kfpu_xstatus = 0;
201 
202 	switch (fp_save_mech) {
203 #if defined(__i386)
204 	case FP_FNSAVE:
205 		fn = &fp->fpu_regs.kfpu_u.kfpu_fn;
206 		cfn = &cfp->fpu_regs.kfpu_u.kfpu_fn;
207 		bcopy(&x87_initial, cfn, sizeof (*cfn));
208 		cfn->f_fcw = fn->f_fcw;
209 		break;
210 #endif
211 	case FP_FXSAVE:
212 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
213 		cfx = &cfp->fpu_regs.kfpu_u.kfpu_fx;
214 		bcopy(&sse_initial, cfx, sizeof (*cfx));
215 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
216 		cfx->fx_fcw = fx->fx_fcw;
217 		break;
218 
219 	case FP_XSAVE:
220 		cfp->fpu_xsave_mask = fp->fpu_xsave_mask;
221 
222 		fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave;
223 		cxs = &cfp->fpu_regs.kfpu_u.kfpu_xs;
224 		cfx = &cxs->xs_fxsave;
225 
226 		bcopy(&avx_initial, cxs, sizeof (*cxs));
227 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
228 		cfx->fx_fcw = fx->fx_fcw;
229 		cxs->xs_xstate_bv |= (get_xcr(XFEATURE_ENABLED_MASK) &
230 		    XFEATURE_FP_ALL);
231 		break;
232 	default:
233 		panic("Invalid fp_save_mech");
234 		/*NOTREACHED*/
235 	}
236 
237 	installctx(ct, cfp,
238 	    fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
239 	/*
240 	 * Now, when the new lwp starts running, it will take a trap
241 	 * that will be handled inline in the trap table to cause
242 	 * the appropriate f*rstor instruction to load the save area we
243 	 * constructed above directly into the hardware.
244 	 */
245 }
246 
247 /*
248  * Free any state associated with floating point context.
249  * Fp_free can be called in three cases:
250  * 1) from reaper -> thread_free -> ctxfree -> fp_free
251  *	fp context belongs to a thread on deathrow
252  *	nothing to do,  thread will never be resumed
253  *	thread calling ctxfree is reaper
254  *
255  * 2) from exec -> ctxfree -> fp_free
256  *	fp context belongs to the current thread
257  *	must disable fpu, thread calling ctxfree is curthread
258  *
259  * 3) from restorecontext -> setfpregs -> fp_free
260  *	we have a modified context in the memory (lwp->pcb_fpu)
261  *	disable fpu and release the fp context for the CPU
262  *
263  */
264 /*ARGSUSED*/
265 void
fp_free(struct fpu_ctx * fp,int isexec)266 fp_free(struct fpu_ctx *fp, int isexec)
267 {
268 	ASSERT(fp_kind != FP_NO);
269 
270 	if (fp->fpu_flags & FPU_VALID)
271 		return;
272 
273 	kpreempt_disable();
274 	/*
275 	 * We want to do fpsave rather than fpdisable so that we can
276 	 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit
277 	 */
278 	fp->fpu_flags |= FPU_VALID;
279 	/* If for current thread disable FP to track FPU_VALID */
280 	if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
281 		/* Clear errors if any to prevent frstor from complaining */
282 		(void) fperr_reset();
283 		if (fp_kind & __FP_SSE)
284 			(void) fpxerr_reset();
285 		fpdisable();
286 	}
287 	kpreempt_enable();
288 }
289 
290 /*
291  * Store the floating point state and disable the floating point unit.
292  */
293 void
fp_save(struct fpu_ctx * fp)294 fp_save(struct fpu_ctx *fp)
295 {
296 	ASSERT(fp_kind != FP_NO);
297 
298 	kpreempt_disable();
299 	if (!fp || fp->fpu_flags & FPU_VALID) {
300 		kpreempt_enable();
301 		return;
302 	}
303 	ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
304 
305 	switch (fp_save_mech) {
306 #if defined(__i386)
307 	case FP_FNSAVE:
308 		fpsave(&fp->fpu_regs.kfpu_u.kfpu_fn);
309 		break;
310 #endif
311 	case FP_FXSAVE:
312 		fpxsave(&fp->fpu_regs.kfpu_u.kfpu_fx);
313 		break;
314 
315 	case FP_XSAVE:
316 		xsave(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
317 		break;
318 	default:
319 		panic("Invalid fp_save_mech");
320 		/*NOTREACHED*/
321 	}
322 
323 	fp->fpu_flags |= FPU_VALID;
324 	kpreempt_enable();
325 }
326 
327 /*
328  * Restore the FPU context for the thread:
329  * The possibilities are:
330  *	1. No active FPU context: Load the new context into the FPU hw
331  *	   and enable the FPU.
332  */
333 void
fp_restore(struct fpu_ctx * fp)334 fp_restore(struct fpu_ctx *fp)
335 {
336 	switch (fp_save_mech) {
337 #if defined(__i386)
338 	case FP_FNSAVE:
339 		fprestore(&fp->fpu_regs.kfpu_u.kfpu_fn);
340 		break;
341 #endif
342 	case FP_FXSAVE:
343 		fpxrestore(&fp->fpu_regs.kfpu_u.kfpu_fx);
344 		break;
345 
346 	case FP_XSAVE:
347 		xrestore(&fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
348 		break;
349 	default:
350 		panic("Invalid fp_save_mech");
351 		/*NOTREACHED*/
352 	}
353 
354 	fp->fpu_flags &= ~FPU_VALID;
355 }
356 
357 
358 /*
359  * Seeds the initial state for the current thread.  The possibilities are:
360  *      1. Another process has modified the FPU state before we have done any
361  *         initialization: Load the FPU state from the LWP state.
362  *      2. The FPU state has not been externally modified:  Load a clean state.
363  */
364 static void
fp_seed(void)365 fp_seed(void)
366 {
367 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
368 
369 	ASSERT(curthread->t_preempt >= 1);
370 	ASSERT((fp->fpu_flags & FPU_EN) == 0);
371 
372 	/*
373 	 * Always initialize a new context and initialize the hardware.
374 	 */
375 	if (fp_save_mech == FP_XSAVE) {
376 		fp->fpu_xsave_mask = get_xcr(XFEATURE_ENABLED_MASK) &
377 		    XFEATURE_FP_ALL;
378 	}
379 
380 	installctx(curthread, fp,
381 	    fpsave_ctxt, NULL, fp_new_lwp, fp_new_lwp, NULL, fp_free);
382 	fpinit();
383 
384 	/*
385 	 * If FPU_VALID is set, it means someone has modified registers via
386 	 * /proc.  In this case, restore the current lwp's state.
387 	 */
388 	if (fp->fpu_flags & FPU_VALID)
389 		fp_restore(fp);
390 
391 	ASSERT((fp->fpu_flags & FPU_VALID) == 0);
392 	fp->fpu_flags = FPU_EN;
393 }
394 
395 /*
396  * This routine is called from trap() when User thread takes No Extension
397  * Fault. The possiblities are:
398  *	1. User thread has executed a FP instruction for the first time.
399  *	   Save current FPU context if any. Initialize FPU, setup FPU
400  *	   context for the thread and enable FP hw.
401  *	2. Thread's pcb has a valid FPU state: Restore the FPU state and
402  *	   enable FP hw.
403  *
404  * Note that case #2 is inlined in the trap table.
405  */
406 int
fpnoextflt(struct regs * rp)407 fpnoextflt(struct regs *rp)
408 {
409 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
410 
411 #if !defined(__lint)
412 	ASSERT(sizeof (struct fxsave_state) == 512 &&
413 	    sizeof (struct fnsave_state) == 108);
414 	ASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
415 
416 	ASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
417 
418 #if defined(__i386)
419 	ASSERT(sizeof (struct _fpu) == sizeof (struct __old_fpu));
420 #endif	/* __i386 */
421 #endif	/* !__lint */
422 
423 	/*
424 	 * save area MUST be 16-byte aligned, else will page fault
425 	 */
426 	ASSERT(((uintptr_t)(&fp->fpu_regs.kfpu_u.kfpu_fx) & 0xf) == 0);
427 
428 	kpreempt_disable();
429 	/*
430 	 * Now we can enable the interrupts.
431 	 * (NOTE: fp-no-coprocessor comes thru interrupt gate)
432 	 */
433 	sti();
434 
435 	if (!fpu_exists) { /* check for FPU hw exists */
436 		if (fp_kind == FP_NO) {
437 			uint32_t inst;
438 
439 			/*
440 			 * When the system has no floating point support,
441 			 * i.e. no FP hardware and no emulator, skip the
442 			 * two kinds of FP instruction that occur in
443 			 * fpstart.  Allows processes that do no real FP
444 			 * to run normally.
445 			 */
446 			if (fuword32((void *)rp->r_pc, &inst) != -1 &&
447 			    ((inst & 0xFFFF) == 0x7dd9 ||
448 			    (inst & 0xFFFF) == 0x6dd9)) {
449 				rp->r_pc += 3;
450 				kpreempt_enable();
451 				return (0);
452 			}
453 		}
454 
455 		/*
456 		 * If we have neither a processor extension nor
457 		 * an emulator, kill the process OR panic the kernel.
458 		 */
459 		kpreempt_enable();
460 		return (1); /* error */
461 	}
462 
463 #if !defined(__xpv)	/* XXPV	Is this ifdef needed now? */
464 	/*
465 	 * A paranoid cross-check: for the SSE case, ensure that %cr4 is
466 	 * configured to enable fully fledged (%xmm) fxsave/fxrestor on
467 	 * this CPU.  For the non-SSE case, ensure that it isn't.
468 	 */
469 	ASSERT(((fp_kind & __FP_SSE) &&
470 	    (getcr4() & CR4_OSFXSR) == CR4_OSFXSR) ||
471 	    (!(fp_kind & __FP_SSE) &&
472 	    (getcr4() & (CR4_OSXMMEXCPT|CR4_OSFXSR)) == 0));
473 #endif
474 
475 	if (fp->fpu_flags & FPU_EN) {
476 		/* case 2 */
477 		fp_restore(fp);
478 	} else {
479 		/* case 1 */
480 		fp_seed();
481 	}
482 	kpreempt_enable();
483 	return (0);
484 }
485 
486 
487 /*
488  * Handle a processor extension overrun fault
489  * Returns non zero for error.
490  *
491  * XXX	Shouldn't this just be abolished given that we're not supporting
492  *	anything prior to Pentium?
493  */
494 
495 /* ARGSUSED */
496 int
fpextovrflt(struct regs * rp)497 fpextovrflt(struct regs *rp)
498 {
499 #if !defined(__xpv)		/* XXPV	Do we need this ifdef either */
500 	ulong_t cur_cr0;
501 
502 	ASSERT(fp_kind != FP_NO);
503 
504 	cur_cr0 = getcr0();
505 	fpinit();		/* initialize the FPU hardware */
506 	setcr0(cur_cr0);
507 #endif
508 	sti();
509 	return (1); 		/* error, send SIGSEGV signal to the thread */
510 }
511 
512 /*
513  * Handle a processor extension error fault
514  * Returns non zero for error.
515  */
516 
517 /*ARGSUSED*/
518 int
fpexterrflt(struct regs * rp)519 fpexterrflt(struct regs *rp)
520 {
521 	uint32_t fpcw, fpsw;
522 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
523 
524 	ASSERT(fp_kind != FP_NO);
525 
526 	/*
527 	 * Now we can enable the interrupts.
528 	 * (NOTE: x87 fp exceptions come thru interrupt gate)
529 	 */
530 	sti();
531 
532 	if (!fpu_exists)
533 		return (FPE_FLTINV);
534 
535 	/*
536 	 * Do an unconditional save of the FP state.  If it's dirty (TS=0),
537 	 * it'll be saved into the fpu context area passed in (that of the
538 	 * current thread).  If it's not dirty (it may not be, due to
539 	 * an intervening save due to a context switch between the sti(),
540 	 * above and here, then it's safe to just use the stored values in
541 	 * the context save area to determine the cause of the fault.
542 	 */
543 	fp_save(fp);
544 
545 	/* clear exception flags in saved state, as if by fnclex */
546 	switch (fp_save_mech) {
547 #if defined(__i386)
548 	case FP_FNSAVE:
549 		fpsw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw;
550 		fpcw = fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw;
551 		fp->fpu_regs.kfpu_u.kfpu_fn.f_fsw &= ~FPS_SW_EFLAGS;
552 		break;
553 #endif
554 
555 	case FP_FXSAVE:
556 		fpsw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
557 		fpcw = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fcw;
558 		fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw &= ~FPS_SW_EFLAGS;
559 		break;
560 
561 	case FP_XSAVE:
562 		fpsw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw;
563 		fpcw = fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fcw;
564 		fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS;
565 		/*
566 		 * Always set LEGACY_FP as it may have been cleared by XSAVE
567 		 * instruction
568 		 */
569 		fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP;
570 		break;
571 	default:
572 		panic("Invalid fp_save_mech");
573 		/*NOTREACHED*/
574 	}
575 
576 	fp->fpu_regs.kfpu_status = fpsw;
577 
578 	if ((fpsw & FPS_ES) == 0)
579 		return (0);		/* No exception */
580 
581 	/*
582 	 * "and" the exception flags with the complement of the mask
583 	 * bits to determine which exception occurred
584 	 */
585 	return (fpe_sicode(fpsw & ~fpcw & 0x3f));
586 }
587 
588 /*
589  * Handle an SSE/SSE2 precise exception.
590  * Returns a non-zero sicode for error.
591  */
592 /*ARGSUSED*/
593 int
fpsimderrflt(struct regs * rp)594 fpsimderrflt(struct regs *rp)
595 {
596 	uint32_t mxcsr, xmask;
597 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
598 
599 	ASSERT(fp_kind & __FP_SSE);
600 
601 	/*
602 	 * NOTE: Interrupts are disabled during execution of this
603 	 * function.  They are enabled by the caller in trap.c.
604 	 */
605 
606 	/*
607 	 * The only way we could have gotten here if there is no FP unit
608 	 * is via a user executing an INT $19 instruction, so there is
609 	 * no fault in that case.
610 	 */
611 	if (!fpu_exists)
612 		return (0);
613 
614 	/*
615 	 * Do an unconditional save of the FP state.  If it's dirty (TS=0),
616 	 * it'll be saved into the fpu context area passed in (that of the
617 	 * current thread).  If it's not dirty, then it's safe to just use
618 	 * the stored values in the context save area to determine the
619 	 * cause of the fault.
620 	 */
621 	fp_save(fp); 		/* save the FPU state */
622 
623 	mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx.fx_mxcsr;
624 
625 	fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx.fx_fsw;
626 
627 	fp->fpu_regs.kfpu_xstatus = mxcsr;
628 
629 	/*
630 	 * compute the mask that determines which conditions can cause
631 	 * a #xm exception, and use this to clean the status bits so that
632 	 * we can identify the true cause of this one.
633 	 */
634 	xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS;
635 	return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask));
636 }
637 
638 /*
639  * In the unlikely event that someone is relying on this subcode being
640  * FPE_FLTILL for denormalize exceptions, it can always be patched back
641  * again to restore old behaviour.
642  */
643 int fpe_fltden = FPE_FLTDEN;
644 
645 /*
646  * Map from the FPU status word to the FP exception si_code.
647  */
648 static int
fpe_sicode(uint_t sw)649 fpe_sicode(uint_t sw)
650 {
651 	if (sw & FPS_IE)
652 		return (FPE_FLTINV);
653 	if (sw & FPS_ZE)
654 		return (FPE_FLTDIV);
655 	if (sw & FPS_DE)
656 		return (fpe_fltden);
657 	if (sw & FPS_OE)
658 		return (FPE_FLTOVF);
659 	if (sw & FPS_UE)
660 		return (FPE_FLTUND);
661 	if (sw & FPS_PE)
662 		return (FPE_FLTRES);
663 	return (FPE_FLTINV);	/* default si_code for other exceptions */
664 }
665 
666 /*
667  * Map from the SSE status word to the FP exception si_code.
668  */
669 static int
fpe_simd_sicode(uint_t sw)670 fpe_simd_sicode(uint_t sw)
671 {
672 	if (sw & SSE_IE)
673 		return (FPE_FLTINV);
674 	if (sw & SSE_ZE)
675 		return (FPE_FLTDIV);
676 	if (sw & SSE_DE)
677 		return (FPE_FLTDEN);
678 	if (sw & SSE_OE)
679 		return (FPE_FLTOVF);
680 	if (sw & SSE_UE)
681 		return (FPE_FLTUND);
682 	if (sw & SSE_PE)
683 		return (FPE_FLTRES);
684 	return (FPE_FLTINV);	/* default si_code for other exceptions */
685 }
686 
687 /*
688  * This routine is invoked as part of libc's __fpstart implementation
689  * via sysi86(2).
690  *
691  * It may be called -before- any context has been assigned in which case
692  * we try and avoid touching the hardware.  Or it may be invoked well
693  * after the context has been assigned and fiddled with, in which case
694  * just tweak it directly.
695  */
696 void
fpsetcw(uint16_t fcw,uint32_t mxcsr)697 fpsetcw(uint16_t fcw, uint32_t mxcsr)
698 {
699 	struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu;
700 	struct fxsave_state *fx;
701 
702 	if (!fpu_exists || fp_kind == FP_NO)
703 		return;
704 
705 	if ((fp->fpu_flags & FPU_EN) == 0) {
706 		if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) {
707 			/*
708 			 * Common case.  Floating point unit not yet
709 			 * enabled, and kernel already intends to initialize
710 			 * the hardware the way the caller wants.
711 			 */
712 			return;
713 		}
714 		/*
715 		 * Hmm.  Userland wants a different default.
716 		 * Do a fake "first trap" to establish the context, then
717 		 * handle as if we already had a context before we came in.
718 		 */
719 		kpreempt_disable();
720 		fp_seed();
721 		kpreempt_enable();
722 	}
723 
724 	/*
725 	 * Ensure that the current hardware state is flushed back to the
726 	 * pcb, then modify that copy.  Next use of the fp will
727 	 * restore the context.
728 	 */
729 	fp_save(fp);
730 
731 	switch (fp_save_mech) {
732 #if defined(__i386)
733 	case FP_FNSAVE:
734 		fp->fpu_regs.kfpu_u.kfpu_fn.f_fcw = fcw;
735 		break;
736 #endif
737 	case FP_FXSAVE:
738 		fx = &fp->fpu_regs.kfpu_u.kfpu_fx;
739 		fx->fx_fcw = fcw;
740 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
741 		break;
742 
743 	case FP_XSAVE:
744 		fx = &fp->fpu_regs.kfpu_u.kfpu_xs.xs_fxsave;
745 		fx->fx_fcw = fcw;
746 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
747 		/*
748 		 * Always set LEGACY_FP as it may have been cleared by XSAVE
749 		 * instruction
750 		 */
751 		fp->fpu_regs.kfpu_u.kfpu_xs.xs_xstate_bv |= XFEATURE_LEGACY_FP;
752 		break;
753 	default:
754 		panic("Invalid fp_save_mech");
755 		/*NOTREACHED*/
756 	}
757 }
758