1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990 William Jolitz.
5 * Copyright (c) 1991 The Regents of the University of California.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bus.h>
36 #include <sys/domainset.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sysctl.h>
45 #include <sys/sysent.h>
46 #include <sys/tslog.h>
47 #include <machine/bus.h>
48 #include <sys/rman.h>
49 #include <sys/signalvar.h>
50 #include <vm/uma.h>
51
52 #include <machine/cputypes.h>
53 #include <machine/frame.h>
54 #include <machine/intr_machdep.h>
55 #include <machine/md_var.h>
56 #include <machine/pcb.h>
57 #include <machine/psl.h>
58 #include <machine/resource.h>
59 #include <machine/specialreg.h>
60 #include <machine/segments.h>
61 #include <machine/ucontext.h>
62 #include <x86/ifunc.h>
63
64 /*
65 * Floating point support.
66 */
67
68 #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw))
69 #define fnclex() __asm __volatile("fnclex")
70 #define fninit() __asm __volatile("fninit")
71 #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
72 #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr)))
73 #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
74 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
75 #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr))
76 #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr)))
77
78 static __inline void
xrstor32(char * addr,uint64_t mask)79 xrstor32(char *addr, uint64_t mask)
80 {
81 uint32_t low, hi;
82
83 low = mask;
84 hi = mask >> 32;
85 __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
86 }
87
88 static __inline void
xrstor64(char * addr,uint64_t mask)89 xrstor64(char *addr, uint64_t mask)
90 {
91 uint32_t low, hi;
92
93 low = mask;
94 hi = mask >> 32;
95 __asm __volatile("xrstor64 %0" : : "m" (*addr), "a" (low), "d" (hi));
96 }
97
98 static __inline void
xsave32(char * addr,uint64_t mask)99 xsave32(char *addr, uint64_t mask)
100 {
101 uint32_t low, hi;
102
103 low = mask;
104 hi = mask >> 32;
105 __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
106 "memory");
107 }
108
109 static __inline void
xsave64(char * addr,uint64_t mask)110 xsave64(char *addr, uint64_t mask)
111 {
112 uint32_t low, hi;
113
114 low = mask;
115 hi = mask >> 32;
116 __asm __volatile("xsave64 %0" : "=m" (*addr) : "a" (low), "d" (hi) :
117 "memory");
118 }
119
120 static __inline void
xsaveopt32(char * addr,uint64_t mask)121 xsaveopt32(char *addr, uint64_t mask)
122 {
123 uint32_t low, hi;
124
125 low = mask;
126 hi = mask >> 32;
127 __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) :
128 "memory");
129 }
130
131 static __inline void
xsaveopt64(char * addr,uint64_t mask)132 xsaveopt64(char *addr, uint64_t mask)
133 {
134 uint32_t low, hi;
135
136 low = mask;
137 hi = mask >> 32;
138 __asm __volatile("xsaveopt64 %0" : "=m" (*addr) : "a" (low), "d" (hi) :
139 "memory");
140 }
141
142 CTASSERT(sizeof(struct savefpu) == 512);
143 CTASSERT(sizeof(struct xstate_hdr) == 64);
144 CTASSERT(sizeof(struct savefpu_ymm) == 832);
145
146 /*
147 * This requirement is to make it easier for asm code to calculate
148 * offset of the fpu save area from the pcb address. FPU save area
149 * must be 64-byte aligned.
150 */
151 CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
152
153 /*
154 * Ensure the copy of XCR0 saved in a core is contained in the padding
155 * area.
156 */
157 CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) &&
158 X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu));
159
160 static void fpu_clean_state(void);
161
162 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
163 SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware");
164
165 int use_xsave; /* non-static for cpu_switch.S */
166 uint64_t xsave_mask; /* the same */
167 static uint64_t xsave_mask_supervisor;
168 static uint64_t xsave_extensions;
169 static uma_zone_t fpu_save_area_zone;
170 static struct savefpu *fpu_initialstate;
171
172 static struct xsave_area_elm_descr {
173 u_int offset;
174 u_int size;
175 u_int flags;
176 } *xsave_area_desc;
177
178 static void
fpusave_xsaveopt64(void * addr)179 fpusave_xsaveopt64(void *addr)
180 {
181 xsaveopt64((char *)addr, xsave_mask);
182 }
183
184 static void
fpusave_xsaveopt3264(void * addr)185 fpusave_xsaveopt3264(void *addr)
186 {
187 if (SV_CURPROC_FLAG(SV_ILP32))
188 xsaveopt32((char *)addr, xsave_mask);
189 else
190 xsaveopt64((char *)addr, xsave_mask);
191 }
192
193 static void
fpusave_xsave64(void * addr)194 fpusave_xsave64(void *addr)
195 {
196 xsave64((char *)addr, xsave_mask);
197 }
198
199 static void
fpusave_xsave3264(void * addr)200 fpusave_xsave3264(void *addr)
201 {
202 if (SV_CURPROC_FLAG(SV_ILP32))
203 xsave32((char *)addr, xsave_mask);
204 else
205 xsave64((char *)addr, xsave_mask);
206 }
207
208 static void
fpurestore_xrstor64(void * addr)209 fpurestore_xrstor64(void *addr)
210 {
211 xrstor64((char *)addr, xsave_mask);
212 }
213
214 static void
fpurestore_xrstor3264(void * addr)215 fpurestore_xrstor3264(void *addr)
216 {
217 if (SV_CURPROC_FLAG(SV_ILP32))
218 xrstor32((char *)addr, xsave_mask);
219 else
220 xrstor64((char *)addr, xsave_mask);
221 }
222
223 static void
fpusave_fxsave(void * addr)224 fpusave_fxsave(void *addr)
225 {
226
227 fxsave((char *)addr);
228 }
229
230 static void
fpurestore_fxrstor(void * addr)231 fpurestore_fxrstor(void *addr)
232 {
233
234 fxrstor((char *)addr);
235 }
236
237 DEFINE_IFUNC(, void, fpusave, (void *))
238 {
239 u_int cp[4];
240
241 if (!use_xsave)
242 return (fpusave_fxsave);
243 cpuid_count(0xd, 0x1, cp);
244 if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
245 return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ?
246 fpusave_xsaveopt64 : fpusave_xsaveopt3264);
247 }
248 return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ?
249 fpusave_xsave64 : fpusave_xsave3264);
250 }
251
252 DEFINE_IFUNC(, void, fpurestore, (void *))
253 {
254 if (!use_xsave)
255 return (fpurestore_fxrstor);
256 return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ?
257 fpurestore_xrstor64 : fpurestore_xrstor3264);
258 }
259
260 void
fpususpend(void * addr)261 fpususpend(void *addr)
262 {
263 u_long cr0;
264
265 cr0 = rcr0();
266 fpu_enable();
267 fpusave(addr);
268 load_cr0(cr0);
269 }
270
271 void
fpuresume(void * addr)272 fpuresume(void *addr)
273 {
274 u_long cr0;
275
276 cr0 = rcr0();
277 fpu_enable();
278 fninit();
279 if (use_xsave)
280 load_xcr(XCR0, xsave_mask);
281 fpurestore(addr);
282 load_cr0(cr0);
283 }
284
285 /*
286 * Enable XSAVE if supported and allowed by user.
287 * Calculate the xsave_mask.
288 */
289 static void
fpuinit_bsp1(void)290 fpuinit_bsp1(void)
291 {
292 u_int cp[4];
293 uint64_t xsave_mask_user;
294 bool old_wp;
295
296 if (!use_xsave)
297 return;
298 cpuid_count(0xd, 0x0, cp);
299 xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
300 if ((cp[0] & xsave_mask) != xsave_mask)
301 panic("CPU0 does not support X87 or SSE: %x", cp[0]);
302 xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
303 xsave_mask_user = xsave_mask;
304 TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
305 xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
306 xsave_mask &= xsave_mask_user;
307 if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512)
308 xsave_mask &= ~XFEATURE_AVX512;
309 if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
310 xsave_mask &= ~XFEATURE_MPX;
311
312 cpuid_count(0xd, 0x1, cp);
313 if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
314 /*
315 * Patch the XSAVE instruction in the cpu_switch code
316 * to XSAVEOPT. We assume that XSAVE encoding used
317 * REX byte, and set the bit 4 of the r/m byte.
318 *
319 * It seems that some BIOSes give control to the OS
320 * with CR0.WP already set, making the kernel text
321 * read-only before cpu_startup().
322 */
323 old_wp = disable_wp();
324 ctx_switch_xsave32[3] |= 0x10;
325 ctx_switch_xsave[3] |= 0x10;
326 restore_wp(old_wp);
327 }
328 xsave_mask_supervisor = ((uint64_t)cp[3] << 32) | cp[2];
329 }
330
331 /*
332 * Calculate the fpu save area size.
333 */
334 static void
fpuinit_bsp2(void)335 fpuinit_bsp2(void)
336 {
337 u_int cp[4];
338
339 if (use_xsave) {
340 cpuid_count(0xd, 0x0, cp);
341 cpu_max_ext_state_size = cp[1];
342
343 /*
344 * Reload the cpu_feature2, since we enabled OSXSAVE.
345 */
346 do_cpuid(1, cp);
347 cpu_feature2 = cp[2];
348 } else
349 cpu_max_ext_state_size = sizeof(struct savefpu);
350 }
351
352 /*
353 * Initialize the floating point unit.
354 */
355 void
fpuinit(void)356 fpuinit(void)
357 {
358 register_t saveintr;
359 uint64_t cr4;
360 u_int mxcsr;
361 u_short control;
362
363 TSENTER();
364 if (IS_BSP())
365 fpuinit_bsp1();
366
367 if (use_xsave) {
368 cr4 = rcr4();
369
370 /*
371 * Revert enablement of PKRU if user disabled its
372 * saving on context switches by clearing the bit in
373 * the xsave mask. Also redundantly clear the bit in
374 * cpu_stdext_feature2 to prevent pmap from ever
375 * trying to set the page table bits.
376 */
377 if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0 &&
378 (xsave_mask & XFEATURE_ENABLED_PKRU) == 0) {
379 cr4 &= ~CR4_PKE;
380 cpu_stdext_feature2 &= ~CPUID_STDEXT2_PKU;
381 }
382
383 load_cr4(cr4 | CR4_XSAVE);
384 load_xcr(XCR0, xsave_mask);
385 }
386
387 /*
388 * XCR0 shall be set up before CPU can report the save area size.
389 */
390 if (IS_BSP())
391 fpuinit_bsp2();
392
393 /*
394 * It is too early for critical_enter() to work on AP.
395 */
396 saveintr = intr_disable();
397 fpu_enable();
398 fninit();
399 control = __INITIAL_FPUCW__;
400 fldcw(control);
401 mxcsr = __INITIAL_MXCSR__;
402 ldmxcsr(mxcsr);
403 fpu_disable();
404 intr_restore(saveintr);
405 TSEXIT();
406 }
407
408 /*
409 * On the boot CPU we generate a clean state that is used to
410 * initialize the floating point unit when it is first used by a
411 * process.
412 */
413 static void
fpuinitstate(void * arg __unused)414 fpuinitstate(void *arg __unused)
415 {
416 uint64_t *xstate_bv;
417 register_t saveintr;
418 int cp[4], i, max_ext_n;
419
420 /* Do potentially blocking operations before disabling interrupts. */
421 fpu_save_area_zone = uma_zcreate("FPU_save_area",
422 cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
423 XSAVE_AREA_ALIGN - 1, 0);
424 fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO);
425 if (use_xsave) {
426 max_ext_n = flsl(xsave_mask | xsave_mask_supervisor);
427 xsave_area_desc = malloc(max_ext_n * sizeof(struct
428 xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
429 }
430
431 cpu_thread_alloc(&thread0);
432
433 saveintr = intr_disable();
434 fpu_enable();
435
436 fpusave_fxsave(fpu_initialstate);
437 if (fpu_initialstate->sv_env.en_mxcsr_mask)
438 cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask;
439 else
440 cpu_mxcsr_mask = 0xFFBF;
441
442 /*
443 * The fninit instruction does not modify XMM registers or x87
444 * registers (MM/ST). The fpusave call dumped the garbage
445 * contained in the registers after reset to the initial state
446 * saved. Clear XMM and x87 registers file image to make the
447 * startup program state and signal handler XMM/x87 register
448 * content predictable.
449 */
450 bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp));
451 bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm));
452
453 /*
454 * Create a table describing the layout of the CPU Extended
455 * Save Area. See Intel SDM rev. 075 Vol. 1 13.4.1 "Legacy
456 * Region of an XSAVE Area" for the source of offsets/sizes.
457 */
458 if (use_xsave) {
459 cpuid_count(0xd, 1, cp);
460 xsave_extensions = cp[0];
461
462 xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) +
463 offsetof(struct xstate_hdr, xstate_bv));
464 *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
465
466 /* x87 state */
467 xsave_area_desc[0].offset = 0;
468 xsave_area_desc[0].size = 160;
469 /* XMM */
470 xsave_area_desc[1].offset = 160;
471 xsave_area_desc[1].size = 416 - 160;
472
473 for (i = 2; i < max_ext_n; i++) {
474 cpuid_count(0xd, i, cp);
475 xsave_area_desc[i].size = cp[0];
476 xsave_area_desc[i].offset = cp[1];
477 xsave_area_desc[i].flags = cp[2];
478 }
479 }
480
481 fpu_disable();
482 intr_restore(saveintr);
483 }
484 /* EFIRT needs this to be initialized before we can enter our EFI environment */
485 SYSINIT(fpuinitstate, SI_SUB_CPU, SI_ORDER_ANY, fpuinitstate, NULL);
486
487 /*
488 * Free coprocessor (if we have it).
489 */
490 void
fpuexit(struct thread * td)491 fpuexit(struct thread *td)
492 {
493
494 critical_enter();
495 if (curthread == PCPU_GET(fpcurthread)) {
496 fpu_enable();
497 fpusave(curpcb->pcb_save);
498 fpu_disable();
499 PCPU_SET(fpcurthread, NULL);
500 }
501 critical_exit();
502 }
503
504 int
fpuformat(void)505 fpuformat(void)
506 {
507
508 return (_MC_FPFMT_XMM);
509 }
510
511 /*
512 * The following mechanism is used to ensure that the FPE_... value
513 * that is passed as a trapcode to the signal handler of the user
514 * process does not have more than one bit set.
515 *
516 * Multiple bits may be set if the user process modifies the control
517 * word while a status word bit is already set. While this is a sign
518 * of bad coding, we have no choice than to narrow them down to one
519 * bit, since we must not send a trapcode that is not exactly one of
520 * the FPE_ macros.
521 *
522 * The mechanism has a static table with 127 entries. Each combination
523 * of the 7 FPU status word exception bits directly translates to a
524 * position in this table, where a single FPE_... value is stored.
525 * This FPE_... value stored there is considered the "most important"
526 * of the exception bits and will be sent as the signal code. The
527 * precedence of the bits is based upon Intel Document "Numerical
528 * Applications", Chapter "Special Computational Situations".
529 *
530 * The macro to choose one of these values does these steps: 1) Throw
531 * away status word bits that cannot be masked. 2) Throw away the bits
532 * currently masked in the control word, assuming the user isn't
533 * interested in them anymore. 3) Reinsert status word bit 7 (stack
534 * fault) if it is set, which cannot be masked but must be presered.
535 * 4) Use the remaining bits to point into the trapcode table.
536 *
537 * The 6 maskable bits in order of their preference, as stated in the
538 * above referenced Intel manual:
539 * 1 Invalid operation (FP_X_INV)
540 * 1a Stack underflow
541 * 1b Stack overflow
542 * 1c Operand of unsupported format
543 * 1d SNaN operand.
544 * 2 QNaN operand (not an exception, irrelavant here)
545 * 3 Any other invalid-operation not mentioned above or zero divide
546 * (FP_X_INV, FP_X_DZ)
547 * 4 Denormal operand (FP_X_DNML)
548 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
549 * 6 Inexact result (FP_X_IMP)
550 */
551 static char fpetable[128] = {
552 0,
553 FPE_FLTINV, /* 1 - INV */
554 FPE_FLTUND, /* 2 - DNML */
555 FPE_FLTINV, /* 3 - INV | DNML */
556 FPE_FLTDIV, /* 4 - DZ */
557 FPE_FLTINV, /* 5 - INV | DZ */
558 FPE_FLTDIV, /* 6 - DNML | DZ */
559 FPE_FLTINV, /* 7 - INV | DNML | DZ */
560 FPE_FLTOVF, /* 8 - OFL */
561 FPE_FLTINV, /* 9 - INV | OFL */
562 FPE_FLTUND, /* A - DNML | OFL */
563 FPE_FLTINV, /* B - INV | DNML | OFL */
564 FPE_FLTDIV, /* C - DZ | OFL */
565 FPE_FLTINV, /* D - INV | DZ | OFL */
566 FPE_FLTDIV, /* E - DNML | DZ | OFL */
567 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
568 FPE_FLTUND, /* 10 - UFL */
569 FPE_FLTINV, /* 11 - INV | UFL */
570 FPE_FLTUND, /* 12 - DNML | UFL */
571 FPE_FLTINV, /* 13 - INV | DNML | UFL */
572 FPE_FLTDIV, /* 14 - DZ | UFL */
573 FPE_FLTINV, /* 15 - INV | DZ | UFL */
574 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
575 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
576 FPE_FLTOVF, /* 18 - OFL | UFL */
577 FPE_FLTINV, /* 19 - INV | OFL | UFL */
578 FPE_FLTUND, /* 1A - DNML | OFL | UFL */
579 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
580 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
581 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
582 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
583 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
584 FPE_FLTRES, /* 20 - IMP */
585 FPE_FLTINV, /* 21 - INV | IMP */
586 FPE_FLTUND, /* 22 - DNML | IMP */
587 FPE_FLTINV, /* 23 - INV | DNML | IMP */
588 FPE_FLTDIV, /* 24 - DZ | IMP */
589 FPE_FLTINV, /* 25 - INV | DZ | IMP */
590 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
591 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
592 FPE_FLTOVF, /* 28 - OFL | IMP */
593 FPE_FLTINV, /* 29 - INV | OFL | IMP */
594 FPE_FLTUND, /* 2A - DNML | OFL | IMP */
595 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
596 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
597 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
598 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
599 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
600 FPE_FLTUND, /* 30 - UFL | IMP */
601 FPE_FLTINV, /* 31 - INV | UFL | IMP */
602 FPE_FLTUND, /* 32 - DNML | UFL | IMP */
603 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
604 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
605 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
606 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
607 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
608 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
609 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
610 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
611 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
612 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
613 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
614 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
615 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
616 FPE_FLTSUB, /* 40 - STK */
617 FPE_FLTSUB, /* 41 - INV | STK */
618 FPE_FLTUND, /* 42 - DNML | STK */
619 FPE_FLTSUB, /* 43 - INV | DNML | STK */
620 FPE_FLTDIV, /* 44 - DZ | STK */
621 FPE_FLTSUB, /* 45 - INV | DZ | STK */
622 FPE_FLTDIV, /* 46 - DNML | DZ | STK */
623 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
624 FPE_FLTOVF, /* 48 - OFL | STK */
625 FPE_FLTSUB, /* 49 - INV | OFL | STK */
626 FPE_FLTUND, /* 4A - DNML | OFL | STK */
627 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
628 FPE_FLTDIV, /* 4C - DZ | OFL | STK */
629 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
630 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
631 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
632 FPE_FLTUND, /* 50 - UFL | STK */
633 FPE_FLTSUB, /* 51 - INV | UFL | STK */
634 FPE_FLTUND, /* 52 - DNML | UFL | STK */
635 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
636 FPE_FLTDIV, /* 54 - DZ | UFL | STK */
637 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
638 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
639 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
640 FPE_FLTOVF, /* 58 - OFL | UFL | STK */
641 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
642 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
643 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
644 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
645 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
646 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
647 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
648 FPE_FLTRES, /* 60 - IMP | STK */
649 FPE_FLTSUB, /* 61 - INV | IMP | STK */
650 FPE_FLTUND, /* 62 - DNML | IMP | STK */
651 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
652 FPE_FLTDIV, /* 64 - DZ | IMP | STK */
653 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
654 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
655 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
656 FPE_FLTOVF, /* 68 - OFL | IMP | STK */
657 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
658 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
659 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
660 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
661 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
662 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
663 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
664 FPE_FLTUND, /* 70 - UFL | IMP | STK */
665 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
666 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
667 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
668 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
669 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
670 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
671 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
672 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
673 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
674 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
675 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
676 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
677 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
678 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
679 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
680 };
681
682 /*
683 * Read the FP status and control words, then generate si_code value
684 * for SIGFPE. The error code chosen will be one of the
685 * FPE_... macros. It will be sent as the second argument to old
686 * BSD-style signal handlers and as "siginfo_t->si_code" (second
687 * argument) to SA_SIGINFO signal handlers.
688 *
689 * Some time ago, we cleared the x87 exceptions with FNCLEX there.
690 * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
691 * usermode code which understands the FPU hardware enough to enable
692 * the exceptions, can also handle clearing the exception state in the
693 * handler. The only consequence of not clearing the exception is the
694 * rethrow of the SIGFPE on return from the signal handler and
695 * reexecution of the corresponding instruction.
696 *
697 * For XMM traps, the exceptions were never cleared.
698 */
699 int
fputrap_x87(void)700 fputrap_x87(void)
701 {
702 struct savefpu *pcb_save;
703 u_short control, status;
704
705 critical_enter();
706
707 /*
708 * Interrupt handling (for another interrupt) may have pushed the
709 * state to memory. Fetch the relevant parts of the state from
710 * wherever they are.
711 */
712 if (PCPU_GET(fpcurthread) != curthread) {
713 pcb_save = curpcb->pcb_save;
714 control = pcb_save->sv_env.en_cw;
715 status = pcb_save->sv_env.en_sw;
716 } else {
717 fnstcw(&control);
718 fnstsw(&status);
719 }
720
721 critical_exit();
722 return (fpetable[status & ((~control & 0x3f) | 0x40)]);
723 }
724
725 int
fputrap_sse(void)726 fputrap_sse(void)
727 {
728 u_int mxcsr;
729
730 critical_enter();
731 if (PCPU_GET(fpcurthread) != curthread)
732 mxcsr = curpcb->pcb_save->sv_env.en_mxcsr;
733 else
734 stmxcsr(&mxcsr);
735 critical_exit();
736 return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
737 }
738
739 static void
restore_fpu_curthread(struct thread * td)740 restore_fpu_curthread(struct thread *td)
741 {
742 struct pcb *pcb;
743
744 /*
745 * Record new context early in case frstor causes a trap.
746 */
747 PCPU_SET(fpcurthread, td);
748
749 fpu_enable();
750 fpu_clean_state();
751 pcb = td->td_pcb;
752
753 if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
754 /*
755 * This is the first time this thread has used the FPU or
756 * the PCB doesn't contain a clean FPU state. Explicitly
757 * load an initial state.
758 *
759 * We prefer to restore the state from the actual save
760 * area in PCB instead of directly loading from
761 * fpu_initialstate, to ignite the XSAVEOPT
762 * tracking engine.
763 */
764 bcopy(fpu_initialstate, pcb->pcb_save,
765 cpu_max_ext_state_size);
766 fpurestore(pcb->pcb_save);
767 if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
768 fldcw(pcb->pcb_initial_fpucw);
769 if (PCB_USER_FPU(pcb))
770 set_pcb_flags(pcb, PCB_FPUINITDONE |
771 PCB_USERFPUINITDONE);
772 else
773 set_pcb_flags(pcb, PCB_FPUINITDONE);
774 } else
775 fpurestore(pcb->pcb_save);
776 }
777
778 /*
779 * Device Not Available (DNA, #NM) exception handler.
780 *
781 * It would be better to switch FP context here (if curthread !=
782 * fpcurthread) and not necessarily for every context switch, but it
783 * is too hard to access foreign pcb's.
784 */
785 void
fpudna(void)786 fpudna(void)
787 {
788 struct thread *td;
789
790 td = curthread;
791 /*
792 * This handler is entered with interrupts enabled, so context
793 * switches may occur before critical_enter() is executed. If
794 * a context switch occurs, then when we regain control, our
795 * state will have been completely restored. The CPU may
796 * change underneath us, but the only part of our context that
797 * lives in the CPU is CR0.TS and that will be "restored" by
798 * setting it on the new CPU.
799 */
800 critical_enter();
801
802 KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
803 ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
804 if (__predict_false(PCPU_GET(fpcurthread) == td)) {
805 /*
806 * Some virtual machines seems to set %cr0.TS at
807 * arbitrary moments. Silently clear the TS bit
808 * regardless of the eager/lazy FPU context switch
809 * mode.
810 */
811 fpu_enable();
812 } else {
813 if (__predict_false(PCPU_GET(fpcurthread) != NULL)) {
814 panic(
815 "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
816 PCPU_GET(fpcurthread),
817 PCPU_GET(fpcurthread)->td_tid, td, td->td_tid);
818 }
819 restore_fpu_curthread(td);
820 }
821 critical_exit();
822 }
823
824 void fpu_activate_sw(struct thread *td); /* Called from the context switch */
825 void
fpu_activate_sw(struct thread * td)826 fpu_activate_sw(struct thread *td)
827 {
828
829 if ((td->td_pflags & TDP_KTHREAD) != 0 || !PCB_USER_FPU(td->td_pcb)) {
830 PCPU_SET(fpcurthread, NULL);
831 fpu_disable();
832 } else if (PCPU_GET(fpcurthread) != td) {
833 restore_fpu_curthread(td);
834 }
835 }
836
837 void
fpudrop(void)838 fpudrop(void)
839 {
840 struct thread *td;
841
842 td = PCPU_GET(fpcurthread);
843 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
844 CRITICAL_ASSERT(td);
845 PCPU_SET(fpcurthread, NULL);
846 clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE);
847 fpu_disable();
848 }
849
850 /*
851 * Get the user state of the FPU into pcb->pcb_user_save without
852 * dropping ownership (if possible). It returns the FPU ownership
853 * status.
854 */
855 int
fpugetregs(struct thread * td)856 fpugetregs(struct thread *td)
857 {
858 struct pcb *pcb;
859 uint64_t *xstate_bv, bit;
860 char *sa;
861 struct savefpu *s;
862 uint32_t mxcsr, mxcsr_mask;
863 int max_ext_n, i, owned;
864 bool do_mxcsr;
865
866 pcb = td->td_pcb;
867 critical_enter();
868 if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
869 bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb),
870 cpu_max_ext_state_size);
871 get_pcb_user_save_pcb(pcb)->sv_env.en_cw =
872 pcb->pcb_initial_fpucw;
873 fpuuserinited(td);
874 critical_exit();
875 return (_MC_FPOWNED_PCB);
876 }
877 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
878 fpusave(get_pcb_user_save_pcb(pcb));
879 owned = _MC_FPOWNED_FPU;
880 } else {
881 owned = _MC_FPOWNED_PCB;
882 }
883 if (use_xsave) {
884 /*
885 * Handle partially saved state.
886 */
887 sa = (char *)get_pcb_user_save_pcb(pcb);
888 xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
889 offsetof(struct xstate_hdr, xstate_bv));
890 max_ext_n = flsl(xsave_mask);
891 for (i = 0; i < max_ext_n; i++) {
892 bit = 1ULL << i;
893 if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
894 continue;
895 do_mxcsr = false;
896 if (i == 0 && (*xstate_bv & (XFEATURE_ENABLED_SSE |
897 XFEATURE_ENABLED_AVX)) != 0) {
898 /*
899 * x87 area was not saved by XSAVEOPT,
900 * but one of XMM or AVX was. Then we need
901 * to preserve MXCSR from being overwritten
902 * with the default value.
903 */
904 s = (struct savefpu *)sa;
905 mxcsr = s->sv_env.en_mxcsr;
906 mxcsr_mask = s->sv_env.en_mxcsr_mask;
907 do_mxcsr = true;
908 }
909 bcopy((char *)fpu_initialstate +
910 xsave_area_desc[i].offset,
911 sa + xsave_area_desc[i].offset,
912 xsave_area_desc[i].size);
913 if (do_mxcsr) {
914 s->sv_env.en_mxcsr = mxcsr;
915 s->sv_env.en_mxcsr_mask = mxcsr_mask;
916 }
917 *xstate_bv |= bit;
918 }
919 }
920 critical_exit();
921 return (owned);
922 }
923
924 void
fpuuserinited(struct thread * td)925 fpuuserinited(struct thread *td)
926 {
927 struct pcb *pcb;
928
929 CRITICAL_ASSERT(td);
930 pcb = td->td_pcb;
931 if (PCB_USER_FPU(pcb))
932 set_pcb_flags(pcb,
933 PCB_FPUINITDONE | PCB_USERFPUINITDONE);
934 else
935 set_pcb_flags(pcb, PCB_FPUINITDONE);
936 }
937
938 int
fpusetxstate(struct thread * td,char * xfpustate,size_t xfpustate_size)939 fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
940 {
941 struct xstate_hdr *hdr, *ehdr;
942 size_t len, max_len;
943 uint64_t bv;
944
945 /* XXXKIB should we clear all extended state in xstate_bv instead ? */
946 if (xfpustate == NULL)
947 return (0);
948 if (!use_xsave)
949 return (EOPNOTSUPP);
950
951 len = xfpustate_size;
952 if (len < sizeof(struct xstate_hdr))
953 return (EINVAL);
954 max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
955 if (len > max_len)
956 return (EINVAL);
957
958 ehdr = (struct xstate_hdr *)xfpustate;
959 bv = ehdr->xstate_bv;
960
961 /*
962 * Avoid #gp.
963 */
964 if (bv & ~xsave_mask)
965 return (EINVAL);
966
967 hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
968
969 hdr->xstate_bv = bv;
970 bcopy(xfpustate + sizeof(struct xstate_hdr),
971 (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
972
973 return (0);
974 }
975
976 /*
977 * Set the state of the FPU.
978 */
979 int
fpusetregs(struct thread * td,struct savefpu * addr,char * xfpustate,size_t xfpustate_size)980 fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate,
981 size_t xfpustate_size)
982 {
983 struct pcb *pcb;
984 int error;
985
986 addr->sv_env.en_mxcsr &= cpu_mxcsr_mask;
987 pcb = td->td_pcb;
988 error = 0;
989 critical_enter();
990 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
991 error = fpusetxstate(td, xfpustate, xfpustate_size);
992 if (error == 0) {
993 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
994 fpurestore(get_pcb_user_save_td(td));
995 set_pcb_flags(pcb, PCB_FPUINITDONE |
996 PCB_USERFPUINITDONE);
997 }
998 } else {
999 error = fpusetxstate(td, xfpustate, xfpustate_size);
1000 if (error == 0) {
1001 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
1002 fpuuserinited(td);
1003 }
1004 }
1005 critical_exit();
1006 return (error);
1007 }
1008
1009 /*
1010 * On AuthenticAMD processors, the fxrstor instruction does not restore
1011 * the x87's stored last instruction pointer, last data pointer, and last
1012 * opcode values, except in the rare case in which the exception summary
1013 * (ES) bit in the x87 status word is set to 1.
1014 *
1015 * In order to avoid leaking this information across processes, we clean
1016 * these values by performing a dummy load before executing fxrstor().
1017 */
1018 static void
fpu_clean_state(void)1019 fpu_clean_state(void)
1020 {
1021 static float dummy_variable = 0.0;
1022 u_short status;
1023
1024 /*
1025 * Clear the ES bit in the x87 status word if it is currently
1026 * set, in order to avoid causing a fault in the upcoming load.
1027 */
1028 fnstsw(&status);
1029 if (status & 0x80)
1030 fnclex();
1031
1032 /*
1033 * Load the dummy variable into the x87 stack. This mangles
1034 * the x87 stack, but we don't care since we're about to call
1035 * fxrstor() anyway.
1036 */
1037 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
1038 }
1039
1040 /*
1041 * This really sucks. We want the acpi version only, but it requires
1042 * the isa_if.h file in order to get the definitions.
1043 */
1044 #include "opt_isa.h"
1045 #ifdef DEV_ISA
1046 #include <isa/isavar.h>
1047 /*
1048 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
1049 */
1050 static struct isa_pnp_id fpupnp_ids[] = {
1051 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
1052 { 0 }
1053 };
1054
1055 static int
fpupnp_probe(device_t dev)1056 fpupnp_probe(device_t dev)
1057 {
1058 int result;
1059
1060 result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids);
1061 if (result <= 0)
1062 device_quiet(dev);
1063 return (result);
1064 }
1065
1066 static int
fpupnp_attach(device_t dev)1067 fpupnp_attach(device_t dev)
1068 {
1069
1070 return (0);
1071 }
1072
1073 static device_method_t fpupnp_methods[] = {
1074 /* Device interface */
1075 DEVMETHOD(device_probe, fpupnp_probe),
1076 DEVMETHOD(device_attach, fpupnp_attach),
1077 { 0, 0 }
1078 };
1079
1080 static driver_t fpupnp_driver = {
1081 "fpupnp",
1082 fpupnp_methods,
1083 1, /* no softc */
1084 };
1085
1086 DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, 0, 0);
1087 ISA_PNP_INFO(fpupnp_ids);
1088 #endif /* DEV_ISA */
1089
1090 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
1091 "Kernel contexts for FPU state");
1092
1093 #define FPU_KERN_CTX_FPUINITDONE 0x01
1094 #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */
1095 #define FPU_KERN_CTX_INUSE 0x04
1096
1097 struct fpu_kern_ctx {
1098 struct savefpu *prev;
1099 uint32_t flags;
1100 char hwstate1[];
1101 };
1102
1103 static inline size_t __pure2
fpu_kern_alloc_sz(u_int max_est)1104 fpu_kern_alloc_sz(u_int max_est)
1105 {
1106 return (sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + max_est);
1107 }
1108
1109 static inline int __pure2
fpu_kern_malloc_flags(u_int fpflags)1110 fpu_kern_malloc_flags(u_int fpflags)
1111 {
1112 return (((fpflags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO);
1113 }
1114
1115 struct fpu_kern_ctx *
fpu_kern_alloc_ctx_domain(int domain,u_int flags)1116 fpu_kern_alloc_ctx_domain(int domain, u_int flags)
1117 {
1118 return (malloc_domainset(fpu_kern_alloc_sz(cpu_max_ext_state_size),
1119 M_FPUKERN_CTX, DOMAINSET_PREF(domain),
1120 fpu_kern_malloc_flags(flags)));
1121 }
1122
1123 struct fpu_kern_ctx *
fpu_kern_alloc_ctx(u_int flags)1124 fpu_kern_alloc_ctx(u_int flags)
1125 {
1126 return (malloc(fpu_kern_alloc_sz(cpu_max_ext_state_size),
1127 M_FPUKERN_CTX, fpu_kern_malloc_flags(flags)));
1128 }
1129
1130 void
fpu_kern_free_ctx(struct fpu_kern_ctx * ctx)1131 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
1132 {
1133
1134 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
1135 /* XXXKIB clear the memory ? */
1136 free(ctx, M_FPUKERN_CTX);
1137 }
1138
1139 static struct savefpu *
fpu_kern_ctx_savefpu(struct fpu_kern_ctx * ctx)1140 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
1141 {
1142 vm_offset_t p;
1143
1144 p = (vm_offset_t)&ctx->hwstate1;
1145 p = roundup2(p, XSAVE_AREA_ALIGN);
1146 return ((struct savefpu *)p);
1147 }
1148
1149 void
fpu_kern_enter(struct thread * td,struct fpu_kern_ctx * ctx,u_int flags)1150 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
1151 {
1152 struct pcb *pcb;
1153
1154 pcb = td->td_pcb;
1155 KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
1156 ("ctx is required when !FPU_KERN_NOCTX"));
1157 KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
1158 ("using inuse ctx"));
1159 KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0,
1160 ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state"));
1161
1162 if ((flags & FPU_KERN_NOCTX) != 0) {
1163 critical_enter();
1164 fpu_enable();
1165 if (curthread == PCPU_GET(fpcurthread)) {
1166 fpusave(curpcb->pcb_save);
1167 PCPU_SET(fpcurthread, NULL);
1168 } else {
1169 KASSERT(PCPU_GET(fpcurthread) == NULL,
1170 ("invalid fpcurthread"));
1171 }
1172
1173 /*
1174 * This breaks XSAVEOPT tracker, but
1175 * PCB_FPUNOSAVE state is supposed to never need to
1176 * save FPU context at all.
1177 */
1178 fpurestore(fpu_initialstate);
1179 set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE |
1180 PCB_FPUINITDONE);
1181 return;
1182 }
1183 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
1184 ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
1185 return;
1186 }
1187 critical_enter();
1188 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
1189 get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
1190 ctx->flags = FPU_KERN_CTX_INUSE;
1191 if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0)
1192 ctx->flags |= FPU_KERN_CTX_FPUINITDONE;
1193 fpuexit(td);
1194 ctx->prev = pcb->pcb_save;
1195 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
1196 set_pcb_flags(pcb, PCB_KERNFPU);
1197 clear_pcb_flags(pcb, PCB_FPUINITDONE);
1198 critical_exit();
1199 }
1200
1201 int
fpu_kern_leave(struct thread * td,struct fpu_kern_ctx * ctx)1202 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
1203 {
1204 struct pcb *pcb;
1205
1206 pcb = td->td_pcb;
1207
1208 if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) {
1209 KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
1210 KASSERT(PCPU_GET(fpcurthread) == NULL,
1211 ("non-NULL fpcurthread for PCB_FPUNOSAVE"));
1212 CRITICAL_ASSERT(td);
1213
1214 clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE);
1215 fpu_disable();
1216 } else {
1217 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
1218 ("leaving not inuse ctx"));
1219 ctx->flags &= ~FPU_KERN_CTX_INUSE;
1220
1221 if (is_fpu_kern_thread(0) &&
1222 (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
1223 return (0);
1224 KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0,
1225 ("dummy ctx"));
1226 critical_enter();
1227 if (curthread == PCPU_GET(fpcurthread))
1228 fpudrop();
1229 pcb->pcb_save = ctx->prev;
1230 }
1231
1232 if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
1233 if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
1234 set_pcb_flags(pcb, PCB_FPUINITDONE);
1235 if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0)
1236 clear_pcb_flags(pcb, PCB_KERNFPU);
1237 } else if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0)
1238 clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU);
1239 } else {
1240 if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0)
1241 set_pcb_flags(pcb, PCB_FPUINITDONE);
1242 else
1243 clear_pcb_flags(pcb, PCB_FPUINITDONE);
1244 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
1245 }
1246 critical_exit();
1247 return (0);
1248 }
1249
1250 int
fpu_kern_thread(u_int flags)1251 fpu_kern_thread(u_int flags)
1252 {
1253
1254 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
1255 ("Only kthread may use fpu_kern_thread"));
1256 KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
1257 ("mangled pcb_save"));
1258 KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
1259
1260 set_pcb_flags(curpcb, PCB_KERNFPU | PCB_KERNFPU_THR);
1261 return (0);
1262 }
1263
1264 int
is_fpu_kern_thread(u_int flags)1265 is_fpu_kern_thread(u_int flags)
1266 {
1267
1268 if ((curthread->td_pflags & TDP_KTHREAD) == 0)
1269 return (0);
1270 return ((curpcb->pcb_flags & PCB_KERNFPU_THR) != 0);
1271 }
1272
1273 /*
1274 * FPU save area alloc/free/init utility routines
1275 */
1276 struct savefpu *
fpu_save_area_alloc(void)1277 fpu_save_area_alloc(void)
1278 {
1279
1280 return (uma_zalloc(fpu_save_area_zone, M_WAITOK));
1281 }
1282
1283 void
fpu_save_area_free(struct savefpu * fsa)1284 fpu_save_area_free(struct savefpu *fsa)
1285 {
1286
1287 uma_zfree(fpu_save_area_zone, fsa);
1288 }
1289
1290 void
fpu_save_area_reset(struct savefpu * fsa)1291 fpu_save_area_reset(struct savefpu *fsa)
1292 {
1293
1294 bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size);
1295 }
1296
1297 static __inline void
xsave_extfeature_check(uint64_t feature,bool supervisor)1298 xsave_extfeature_check(uint64_t feature, bool supervisor)
1299 {
1300 #ifdef INVARIANTS
1301 uint64_t mask;
1302
1303 mask = supervisor ? xsave_mask_supervisor : xsave_mask;
1304 KASSERT((feature & (feature - 1)) == 0,
1305 ("%s: invalid XFEATURE 0x%lx", __func__, feature));
1306 KASSERT(ilog2(feature) <= ilog2(mask),
1307 ("%s: unsupported %s XFEATURE 0x%lx", __func__,
1308 supervisor ? "supervisor" : "user", feature));
1309 #endif
1310 }
1311
1312 static __inline void
xsave_extstate_bv_check(uint64_t xstate_bv,bool supervisor)1313 xsave_extstate_bv_check(uint64_t xstate_bv, bool supervisor)
1314 {
1315 #ifdef INVARIANTS
1316 uint64_t mask;
1317
1318 mask = supervisor ? xsave_mask_supervisor : xsave_mask;
1319 KASSERT(xstate_bv != 0 && ilog2(xstate_bv) <= ilog2(mask),
1320 ("%s: invalid XSTATE_BV 0x%lx", __func__, xstate_bv));
1321 #endif
1322 }
1323
1324 /*
1325 * Returns whether the XFEATURE 'feature' is supported as a user state
1326 * or supervisor state component.
1327 */
1328 bool
xsave_extfeature_supported(uint64_t feature,bool supervisor)1329 xsave_extfeature_supported(uint64_t feature, bool supervisor)
1330 {
1331 int idx;
1332 uint64_t mask;
1333
1334 KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
1335 xsave_extfeature_check(feature, supervisor);
1336
1337 mask = supervisor ? xsave_mask_supervisor : xsave_mask;
1338 if ((mask & feature) == 0)
1339 return (false);
1340 idx = ilog2(feature);
1341 return (((xsave_area_desc[idx].flags & CPUID_EXTSTATE_SUPERVISOR) != 0) ==
1342 supervisor);
1343 }
1344
1345 /*
1346 * Returns whether the given XSAVE extension is supported.
1347 */
1348 bool
xsave_extension_supported(uint64_t extension)1349 xsave_extension_supported(uint64_t extension)
1350 {
1351 KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
1352
1353 return ((xsave_extensions & extension) != 0);
1354 }
1355
1356 /*
1357 * Returns offset for XFEATURE 'feature' given the requested feature bitmap
1358 * 'xstate_bv', and extended region format ('compact').
1359 */
1360 size_t
xsave_area_offset(uint64_t xstate_bv,uint64_t feature,bool compact,bool supervisor)1361 xsave_area_offset(uint64_t xstate_bv, uint64_t feature,
1362 bool compact, bool supervisor)
1363 {
1364 int i, idx;
1365 size_t offs;
1366 struct xsave_area_elm_descr *xep;
1367
1368 KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
1369 xsave_extstate_bv_check(xstate_bv, supervisor);
1370 xsave_extfeature_check(feature, supervisor);
1371
1372 idx = ilog2(feature);
1373 if (!compact)
1374 return (xsave_area_desc[idx].offset);
1375 offs = sizeof(struct savefpu) + sizeof(struct xstate_hdr);
1376 xstate_bv &= ~(XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE);
1377 while ((i = ffs(xstate_bv) - 1) > 0 && i < idx) {
1378 xep = &xsave_area_desc[i];
1379 if ((xep->flags & CPUID_EXTSTATE_ALIGNED) != 0)
1380 offs = roundup2(offs, 64);
1381 offs += xep->size;
1382 xstate_bv &= ~((uint64_t)1 << i);
1383 }
1384
1385 return (offs);
1386 }
1387
1388 /*
1389 * Returns the XSAVE area size for the requested feature bitmap
1390 * 'xstate_bv' and extended region format ('compact').
1391 */
1392 size_t
xsave_area_size(uint64_t xstate_bv,bool compact,bool supervisor)1393 xsave_area_size(uint64_t xstate_bv, bool compact, bool supervisor)
1394 {
1395 int last_idx;
1396
1397 KASSERT(use_xsave, ("%s: XSAVE not supported", __func__));
1398 xsave_extstate_bv_check(xstate_bv, supervisor);
1399
1400 last_idx = ilog2(xstate_bv);
1401
1402 return (xsave_area_offset(xstate_bv, (uint64_t)1 << last_idx, compact, supervisor) +
1403 xsave_area_desc[last_idx].size);
1404 }
1405
1406 size_t
xsave_area_hdr_offset(void)1407 xsave_area_hdr_offset(void)
1408 {
1409 return (sizeof(struct savefpu));
1410 }
1411