15b81b6b3SRodney W. Grimes /*- 25b81b6b3SRodney W. Grimes * Copyright (c) 1990 William Jolitz. 35b81b6b3SRodney W. Grimes * Copyright (c) 1991 The Regents of the University of California. 45b81b6b3SRodney W. Grimes * All rights reserved. 55b81b6b3SRodney W. Grimes * 65b81b6b3SRodney W. Grimes * Redistribution and use in source and binary forms, with or without 75b81b6b3SRodney W. Grimes * modification, are permitted provided that the following conditions 85b81b6b3SRodney W. Grimes * are met: 95b81b6b3SRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 105b81b6b3SRodney W. Grimes * notice, this list of conditions and the following disclaimer. 115b81b6b3SRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 125b81b6b3SRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 135b81b6b3SRodney W. Grimes * documentation and/or other materials provided with the distribution. 14fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 155b81b6b3SRodney W. Grimes * may be used to endorse or promote products derived from this software 165b81b6b3SRodney W. Grimes * without specific prior written permission. 175b81b6b3SRodney W. Grimes * 185b81b6b3SRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 195b81b6b3SRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 205b81b6b3SRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 215b81b6b3SRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 225b81b6b3SRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 235b81b6b3SRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 245b81b6b3SRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 255b81b6b3SRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 265b81b6b3SRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 275b81b6b3SRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 285b81b6b3SRodney W. Grimes * SUCH DAMAGE. 295b81b6b3SRodney W. Grimes * 3021616ec3SPeter Wemm * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 315b81b6b3SRodney W. Grimes */ 325b81b6b3SRodney W. Grimes 3356ae44c5SDavid E. O'Brien #include <sys/cdefs.h> 3456ae44c5SDavid E. O'Brien __FBSDID("$FreeBSD$"); 3556ae44c5SDavid E. O'Brien 36f540b106SGarrett Wollman #include <sys/param.h> 37f540b106SGarrett Wollman #include <sys/systm.h> 386182fdbdSPeter Wemm #include <sys/bus.h> 393a34a5c3SPoul-Henning Kamp #include <sys/kernel.h> 40fb919e4dSMark Murray #include <sys/lock.h> 41cd59d49dSBruce Evans #include <sys/malloc.h> 426182fdbdSPeter Wemm #include <sys/module.h> 43c1ef8aacSJake Burkholder #include <sys/mutex.h> 44fb919e4dSMark Murray #include <sys/mutex.h> 45fb919e4dSMark Murray #include <sys/proc.h> 46fb919e4dSMark Murray #include <sys/sysctl.h> 476182fdbdSPeter Wemm #include <machine/bus.h> 486182fdbdSPeter Wemm #include <sys/rman.h> 49663f1485SBruce Evans #include <sys/signalvar.h> 502741efecSPeter Grehan #include <vm/uma.h> 512f86936aSGarrett Wollman 527f47cf2fSBruce Evans #include <machine/cputypes.h> 537f47cf2fSBruce Evans #include <machine/frame.h> 540d2a2989SPeter Wemm #include <machine/intr_machdep.h> 55c673fe98SBruce Evans #include <machine/md_var.h> 565400ed3bSPeter Wemm #include <machine/pcb.h> 577f47cf2fSBruce Evans #include <machine/psl.h> 586182fdbdSPeter Wemm #include <machine/resource.h> 59f540b106SGarrett Wollman #include <machine/specialreg.h> 607f47cf2fSBruce Evans #include <machine/segments.h> 6130abe507SJonathan Mini #include <machine/ucontext.h> 622f86936aSGarrett Wollman 635b81b6b3SRodney W. Grimes /* 64bf2f09eeSPeter Wemm * Floating point support. 655b81b6b3SRodney W. Grimes */ 665b81b6b3SRodney W. Grimes 67a5f50ef9SJoerg Wunsch #if defined(__GNUCLIKE_ASM) && !defined(lint) 685b81b6b3SRodney W. Grimes 6917275403SJung-uk Kim #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 7030402401SJung-uk Kim #define fnclex() __asm __volatile("fnclex") 7130402401SJung-uk Kim #define fninit() __asm __volatile("fninit") 721d37f051SBruce Evans #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 732e50fa36SJung-uk Kim #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 7430402401SJung-uk Kim #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 759d146ac5SPeter Wemm #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 7607c86dcfSJung-uk Kim #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) 77a81f9fedSKonstantin Belousov #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) 785b81b6b3SRodney W. Grimes 7994818d19SKonstantin Belousov static __inline void 8094818d19SKonstantin Belousov xrstor(char *addr, uint64_t mask) 8194818d19SKonstantin Belousov { 8294818d19SKonstantin Belousov uint32_t low, hi; 8394818d19SKonstantin Belousov 8494818d19SKonstantin Belousov low = mask; 8594818d19SKonstantin Belousov hi = mask >> 32; 867574a595SJohn Baldwin __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); 8794818d19SKonstantin Belousov } 8894818d19SKonstantin Belousov 8994818d19SKonstantin Belousov static __inline void 9094818d19SKonstantin Belousov xsave(char *addr, uint64_t mask) 9194818d19SKonstantin Belousov { 9294818d19SKonstantin Belousov uint32_t low, hi; 9394818d19SKonstantin Belousov 9494818d19SKonstantin Belousov low = mask; 9594818d19SKonstantin Belousov hi = mask >> 32; 967574a595SJohn Baldwin __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : 977574a595SJohn Baldwin "memory"); 9894818d19SKonstantin Belousov } 9994818d19SKonstantin Belousov 100cf4e1c46SPeter Wemm #else /* !(__GNUCLIKE_ASM && !lint) */ 1015b81b6b3SRodney W. Grimes 10217275403SJung-uk Kim void fldcw(u_short cw); 10389c9a483SAlfred Perlstein void fnclex(void); 10489c9a483SAlfred Perlstein void fninit(void); 10589c9a483SAlfred Perlstein void fnstcw(caddr_t addr); 10689c9a483SAlfred Perlstein void fnstsw(caddr_t addr); 10789c9a483SAlfred Perlstein void fxsave(caddr_t addr); 10889c9a483SAlfred Perlstein void fxrstor(caddr_t addr); 10907c86dcfSJung-uk Kim void ldmxcsr(u_int csr); 110a42fa0afSKonstantin Belousov void stmxcsr(u_int *csr); 11194818d19SKonstantin Belousov void xrstor(char *addr, uint64_t mask); 11294818d19SKonstantin Belousov void xsave(char *addr, uint64_t mask); 1135b81b6b3SRodney W. Grimes 114cf4e1c46SPeter Wemm #endif /* __GNUCLIKE_ASM && !lint */ 1155b81b6b3SRodney W. Grimes 116d706ec29SJohn Baldwin #define start_emulating() load_cr0(rcr0() | CR0_TS) 117d706ec29SJohn Baldwin #define stop_emulating() clts() 118d706ec29SJohn Baldwin 1198c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct savefpu) == 512); 1208c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct xstate_hdr) == 64); 1218c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct savefpu_ymm) == 832); 1228c6f8f3dSKonstantin Belousov 1238c6f8f3dSKonstantin Belousov /* 1248c6f8f3dSKonstantin Belousov * This requirement is to make it easier for asm code to calculate 1258c6f8f3dSKonstantin Belousov * offset of the fpu save area from the pcb address. FPU save area 126b74a2290SKonstantin Belousov * must be 64-byte aligned. 1278c6f8f3dSKonstantin Belousov */ 1288c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); 1295b81b6b3SRodney W. Grimes 130180e57e5SJohn Baldwin /* 131180e57e5SJohn Baldwin * Ensure the copy of XCR0 saved in a core is contained in the padding 132180e57e5SJohn Baldwin * area. 133180e57e5SJohn Baldwin */ 134180e57e5SJohn Baldwin CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && 135180e57e5SJohn Baldwin X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); 136180e57e5SJohn Baldwin 1372652af56SColin Percival static void fpu_clean_state(void); 1382652af56SColin Percival 1390b7dc0a7SJohn Baldwin SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 140f0188618SHans Petter Selasky SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); 1413a34a5c3SPoul-Henning Kamp 1428c6f8f3dSKonstantin Belousov int use_xsave; /* non-static for cpu_switch.S */ 1438c6f8f3dSKonstantin Belousov uint64_t xsave_mask; /* the same */ 1442741efecSPeter Grehan static uma_zone_t fpu_save_area_zone; 1458c6f8f3dSKonstantin Belousov static struct savefpu *fpu_initialstate; 1468c6f8f3dSKonstantin Belousov 147333d0c60SKonstantin Belousov struct xsave_area_elm_descr { 148333d0c60SKonstantin Belousov u_int offset; 149333d0c60SKonstantin Belousov u_int size; 150333d0c60SKonstantin Belousov } *xsave_area_desc; 151333d0c60SKonstantin Belousov 1528c6f8f3dSKonstantin Belousov void 1538c6f8f3dSKonstantin Belousov fpusave(void *addr) 1548c6f8f3dSKonstantin Belousov { 1558c6f8f3dSKonstantin Belousov 1568c6f8f3dSKonstantin Belousov if (use_xsave) 1578c6f8f3dSKonstantin Belousov xsave((char *)addr, xsave_mask); 1588c6f8f3dSKonstantin Belousov else 1598c6f8f3dSKonstantin Belousov fxsave((char *)addr); 1608c6f8f3dSKonstantin Belousov } 1618c6f8f3dSKonstantin Belousov 1622741efecSPeter Grehan void 1638c6f8f3dSKonstantin Belousov fpurestore(void *addr) 1648c6f8f3dSKonstantin Belousov { 1658c6f8f3dSKonstantin Belousov 1668c6f8f3dSKonstantin Belousov if (use_xsave) 1678c6f8f3dSKonstantin Belousov xrstor((char *)addr, xsave_mask); 1688c6f8f3dSKonstantin Belousov else 1698c6f8f3dSKonstantin Belousov fxrstor((char *)addr); 1708c6f8f3dSKonstantin Belousov } 1713902c3efSSteve Passe 1721d22d877SJung-uk Kim void 1731d22d877SJung-uk Kim fpususpend(void *addr) 1741d22d877SJung-uk Kim { 1751d22d877SJung-uk Kim u_long cr0; 1761d22d877SJung-uk Kim 1771d22d877SJung-uk Kim cr0 = rcr0(); 1781d22d877SJung-uk Kim stop_emulating(); 1791d22d877SJung-uk Kim fpusave(addr); 1801d22d877SJung-uk Kim load_cr0(cr0); 1811d22d877SJung-uk Kim } 1821d22d877SJung-uk Kim 183b1d735baSJohn Baldwin void 184b1d735baSJohn Baldwin fpuresume(void *addr) 185b1d735baSJohn Baldwin { 186b1d735baSJohn Baldwin u_long cr0; 187b1d735baSJohn Baldwin 188b1d735baSJohn Baldwin cr0 = rcr0(); 189b1d735baSJohn Baldwin stop_emulating(); 190b1d735baSJohn Baldwin fninit(); 191b1d735baSJohn Baldwin if (use_xsave) 192b1d735baSJohn Baldwin load_xcr(XCR0, xsave_mask); 193b1d735baSJohn Baldwin fpurestore(addr); 194b1d735baSJohn Baldwin load_cr0(cr0); 195b1d735baSJohn Baldwin } 196b1d735baSJohn Baldwin 1975b81b6b3SRodney W. Grimes /* 1988c6f8f3dSKonstantin Belousov * Enable XSAVE if supported and allowed by user. 1998c6f8f3dSKonstantin Belousov * Calculate the xsave_mask. 2008c6f8f3dSKonstantin Belousov */ 2018c6f8f3dSKonstantin Belousov static void 2028c6f8f3dSKonstantin Belousov fpuinit_bsp1(void) 2038c6f8f3dSKonstantin Belousov { 2048c6f8f3dSKonstantin Belousov u_int cp[4]; 2058c6f8f3dSKonstantin Belousov uint64_t xsave_mask_user; 2068c6f8f3dSKonstantin Belousov 2078c6f8f3dSKonstantin Belousov if ((cpu_feature2 & CPUID2_XSAVE) != 0) { 2088c6f8f3dSKonstantin Belousov use_xsave = 1; 2098c6f8f3dSKonstantin Belousov TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); 2108c6f8f3dSKonstantin Belousov } 2118c6f8f3dSKonstantin Belousov if (!use_xsave) 2128c6f8f3dSKonstantin Belousov return; 2138c6f8f3dSKonstantin Belousov 2148c6f8f3dSKonstantin Belousov cpuid_count(0xd, 0x0, cp); 2158c6f8f3dSKonstantin Belousov xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 2168c6f8f3dSKonstantin Belousov if ((cp[0] & xsave_mask) != xsave_mask) 2178c6f8f3dSKonstantin Belousov panic("CPU0 does not support X87 or SSE: %x", cp[0]); 2188c6f8f3dSKonstantin Belousov xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; 2198c6f8f3dSKonstantin Belousov xsave_mask_user = xsave_mask; 2208c6f8f3dSKonstantin Belousov TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); 2218c6f8f3dSKonstantin Belousov xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 2228c6f8f3dSKonstantin Belousov xsave_mask &= xsave_mask_user; 2230eb7ae8dSJohn Baldwin if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) 2240eb7ae8dSJohn Baldwin xsave_mask &= ~XFEATURE_AVX512; 2250eb7ae8dSJohn Baldwin if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) 2260eb7ae8dSJohn Baldwin xsave_mask &= ~XFEATURE_MPX; 227333d0c60SKonstantin Belousov 228333d0c60SKonstantin Belousov cpuid_count(0xd, 0x1, cp); 229333d0c60SKonstantin Belousov if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { 230333d0c60SKonstantin Belousov /* 231333d0c60SKonstantin Belousov * Patch the XSAVE instruction in the cpu_switch code 232333d0c60SKonstantin Belousov * to XSAVEOPT. We assume that XSAVE encoding used 233333d0c60SKonstantin Belousov * REX byte, and set the bit 4 of the r/m byte. 234333d0c60SKonstantin Belousov */ 235333d0c60SKonstantin Belousov ctx_switch_xsave[3] |= 0x10; 236333d0c60SKonstantin Belousov } 2378c6f8f3dSKonstantin Belousov } 2388c6f8f3dSKonstantin Belousov 2398c6f8f3dSKonstantin Belousov /* 2408c6f8f3dSKonstantin Belousov * Calculate the fpu save area size. 2418c6f8f3dSKonstantin Belousov */ 2428c6f8f3dSKonstantin Belousov static void 2438c6f8f3dSKonstantin Belousov fpuinit_bsp2(void) 2448c6f8f3dSKonstantin Belousov { 2458c6f8f3dSKonstantin Belousov u_int cp[4]; 2468c6f8f3dSKonstantin Belousov 2478c6f8f3dSKonstantin Belousov if (use_xsave) { 2488c6f8f3dSKonstantin Belousov cpuid_count(0xd, 0x0, cp); 2498c6f8f3dSKonstantin Belousov cpu_max_ext_state_size = cp[1]; 2508c6f8f3dSKonstantin Belousov 2518c6f8f3dSKonstantin Belousov /* 2528c6f8f3dSKonstantin Belousov * Reload the cpu_feature2, since we enabled OSXSAVE. 2538c6f8f3dSKonstantin Belousov */ 2548c6f8f3dSKonstantin Belousov do_cpuid(1, cp); 2558c6f8f3dSKonstantin Belousov cpu_feature2 = cp[2]; 2568c6f8f3dSKonstantin Belousov } else 2578c6f8f3dSKonstantin Belousov cpu_max_ext_state_size = sizeof(struct savefpu); 2588c6f8f3dSKonstantin Belousov } 2598c6f8f3dSKonstantin Belousov 2608c6f8f3dSKonstantin Belousov /* 2618c6f8f3dSKonstantin Belousov * Initialize the floating point unit. 262da4113b3SPeter Wemm */ 263398dbb11SPeter Wemm void 2641c89210cSPeter Wemm fpuinit(void) 265da4113b3SPeter Wemm { 2660689bdccSJohn Baldwin register_t saveintr; 26796a7759eSPeter Wemm u_int mxcsr; 268398dbb11SPeter Wemm u_short control; 269da4113b3SPeter Wemm 2708c6f8f3dSKonstantin Belousov if (IS_BSP()) 2718c6f8f3dSKonstantin Belousov fpuinit_bsp1(); 2728c6f8f3dSKonstantin Belousov 2738c6f8f3dSKonstantin Belousov if (use_xsave) { 2748c6f8f3dSKonstantin Belousov load_cr4(rcr4() | CR4_XSAVE); 2757574a595SJohn Baldwin load_xcr(XCR0, xsave_mask); 2768c6f8f3dSKonstantin Belousov } 2778c6f8f3dSKonstantin Belousov 2788c6f8f3dSKonstantin Belousov /* 2798c6f8f3dSKonstantin Belousov * XCR0 shall be set up before CPU can report the save area size. 2808c6f8f3dSKonstantin Belousov */ 2818c6f8f3dSKonstantin Belousov if (IS_BSP()) 2828c6f8f3dSKonstantin Belousov fpuinit_bsp2(); 2838c6f8f3dSKonstantin Belousov 28499753495SKonstantin Belousov /* 28599753495SKonstantin Belousov * It is too early for critical_enter() to work on AP. 28699753495SKonstantin Belousov */ 2870689bdccSJohn Baldwin saveintr = intr_disable(); 2885b81b6b3SRodney W. Grimes stop_emulating(); 2895b81b6b3SRodney W. Grimes fninit(); 290398dbb11SPeter Wemm control = __INITIAL_FPUCW__; 29117275403SJung-uk Kim fldcw(control); 29296a7759eSPeter Wemm mxcsr = __INITIAL_MXCSR__; 29396a7759eSPeter Wemm ldmxcsr(mxcsr); 294a8346a98SJohn Baldwin start_emulating(); 2950689bdccSJohn Baldwin intr_restore(saveintr); 2965b81b6b3SRodney W. Grimes } 2975b81b6b3SRodney W. Grimes 2985b81b6b3SRodney W. Grimes /* 2998c6f8f3dSKonstantin Belousov * On the boot CPU we generate a clean state that is used to 3008c6f8f3dSKonstantin Belousov * initialize the floating point unit when it is first used by a 3018c6f8f3dSKonstantin Belousov * process. 3028c6f8f3dSKonstantin Belousov */ 3038c6f8f3dSKonstantin Belousov static void 3048c6f8f3dSKonstantin Belousov fpuinitstate(void *arg __unused) 3058c6f8f3dSKonstantin Belousov { 3068c6f8f3dSKonstantin Belousov register_t saveintr; 307333d0c60SKonstantin Belousov int cp[4], i, max_ext_n; 3088c6f8f3dSKonstantin Belousov 3098c6f8f3dSKonstantin Belousov fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, 3108c6f8f3dSKonstantin Belousov M_WAITOK | M_ZERO); 3118c6f8f3dSKonstantin Belousov saveintr = intr_disable(); 3128c6f8f3dSKonstantin Belousov stop_emulating(); 3138c6f8f3dSKonstantin Belousov 3148c6f8f3dSKonstantin Belousov fpusave(fpu_initialstate); 3158c6f8f3dSKonstantin Belousov if (fpu_initialstate->sv_env.en_mxcsr_mask) 3168c6f8f3dSKonstantin Belousov cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; 3178c6f8f3dSKonstantin Belousov else 3188c6f8f3dSKonstantin Belousov cpu_mxcsr_mask = 0xFFBF; 3198c6f8f3dSKonstantin Belousov 3208c6f8f3dSKonstantin Belousov /* 321b57e6814SKonstantin Belousov * The fninit instruction does not modify XMM registers or x87 322b57e6814SKonstantin Belousov * registers (MM/ST). The fpusave call dumped the garbage 323b57e6814SKonstantin Belousov * contained in the registers after reset to the initial state 324b57e6814SKonstantin Belousov * saved. Clear XMM and x87 registers file image to make the 325b57e6814SKonstantin Belousov * startup program state and signal handler XMM/x87 register 326b57e6814SKonstantin Belousov * content predictable. 3278c6f8f3dSKonstantin Belousov */ 328b57e6814SKonstantin Belousov bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); 329b57e6814SKonstantin Belousov bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); 3308c6f8f3dSKonstantin Belousov 331333d0c60SKonstantin Belousov /* 332333d0c60SKonstantin Belousov * Create a table describing the layout of the CPU Extended 333333d0c60SKonstantin Belousov * Save Area. 334333d0c60SKonstantin Belousov */ 33514f52559SKonstantin Belousov if (use_xsave) { 336333d0c60SKonstantin Belousov max_ext_n = flsl(xsave_mask); 337333d0c60SKonstantin Belousov xsave_area_desc = malloc(max_ext_n * sizeof(struct 338333d0c60SKonstantin Belousov xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); 339333d0c60SKonstantin Belousov /* x87 state */ 340333d0c60SKonstantin Belousov xsave_area_desc[0].offset = 0; 341333d0c60SKonstantin Belousov xsave_area_desc[0].size = 160; 342333d0c60SKonstantin Belousov /* XMM */ 343333d0c60SKonstantin Belousov xsave_area_desc[1].offset = 160; 344333d0c60SKonstantin Belousov xsave_area_desc[1].size = 288 - 160; 345333d0c60SKonstantin Belousov 346333d0c60SKonstantin Belousov for (i = 2; i < max_ext_n; i++) { 347333d0c60SKonstantin Belousov cpuid_count(0xd, i, cp); 348333d0c60SKonstantin Belousov xsave_area_desc[i].offset = cp[1]; 349333d0c60SKonstantin Belousov xsave_area_desc[i].size = cp[0]; 350333d0c60SKonstantin Belousov } 351333d0c60SKonstantin Belousov } 352333d0c60SKonstantin Belousov 3532741efecSPeter Grehan fpu_save_area_zone = uma_zcreate("FPU_save_area", 3542741efecSPeter Grehan cpu_max_ext_state_size, NULL, NULL, NULL, NULL, 3552741efecSPeter Grehan XSAVE_AREA_ALIGN - 1, 0); 3562741efecSPeter Grehan 3578c6f8f3dSKonstantin Belousov start_emulating(); 3588c6f8f3dSKonstantin Belousov intr_restore(saveintr); 3598c6f8f3dSKonstantin Belousov } 3608c6f8f3dSKonstantin Belousov SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); 3618c6f8f3dSKonstantin Belousov 3628c6f8f3dSKonstantin Belousov /* 3635b81b6b3SRodney W. Grimes * Free coprocessor (if we have it). 3645b81b6b3SRodney W. Grimes */ 3655b81b6b3SRodney W. Grimes void 366bf2f09eeSPeter Wemm fpuexit(struct thread *td) 3675b81b6b3SRodney W. Grimes { 3685b81b6b3SRodney W. Grimes 36999753495SKonstantin Belousov critical_enter(); 3701c89210cSPeter Wemm if (curthread == PCPU_GET(fpcurthread)) { 3711c89210cSPeter Wemm stop_emulating(); 37283b22b05SKonstantin Belousov fpusave(curpcb->pcb_save); 3731c89210cSPeter Wemm start_emulating(); 3746dfc9e44SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 3751c89210cSPeter Wemm } 37699753495SKonstantin Belousov critical_exit(); 3775b81b6b3SRodney W. Grimes } 3785b81b6b3SRodney W. Grimes 37930abe507SJonathan Mini int 380f132cd05SKonstantin Belousov fpuformat(void) 38130abe507SJonathan Mini { 38230abe507SJonathan Mini 38330abe507SJonathan Mini return (_MC_FPFMT_XMM); 38430abe507SJonathan Mini } 38530abe507SJonathan Mini 3865b81b6b3SRodney W. Grimes /* 387a7674320SMartin Cracauer * The following mechanism is used to ensure that the FPE_... value 388a7674320SMartin Cracauer * that is passed as a trapcode to the signal handler of the user 389a7674320SMartin Cracauer * process does not have more than one bit set. 390a7674320SMartin Cracauer * 391a7674320SMartin Cracauer * Multiple bits may be set if the user process modifies the control 392a7674320SMartin Cracauer * word while a status word bit is already set. While this is a sign 393a7674320SMartin Cracauer * of bad coding, we have no choise than to narrow them down to one 394a7674320SMartin Cracauer * bit, since we must not send a trapcode that is not exactly one of 395a7674320SMartin Cracauer * the FPE_ macros. 396a7674320SMartin Cracauer * 397a7674320SMartin Cracauer * The mechanism has a static table with 127 entries. Each combination 398a7674320SMartin Cracauer * of the 7 FPU status word exception bits directly translates to a 399a7674320SMartin Cracauer * position in this table, where a single FPE_... value is stored. 400a7674320SMartin Cracauer * This FPE_... value stored there is considered the "most important" 401a7674320SMartin Cracauer * of the exception bits and will be sent as the signal code. The 402a7674320SMartin Cracauer * precedence of the bits is based upon Intel Document "Numerical 403a7674320SMartin Cracauer * Applications", Chapter "Special Computational Situations". 404a7674320SMartin Cracauer * 405a7674320SMartin Cracauer * The macro to choose one of these values does these steps: 1) Throw 406a7674320SMartin Cracauer * away status word bits that cannot be masked. 2) Throw away the bits 407a7674320SMartin Cracauer * currently masked in the control word, assuming the user isn't 408a7674320SMartin Cracauer * interested in them anymore. 3) Reinsert status word bit 7 (stack 409a7674320SMartin Cracauer * fault) if it is set, which cannot be masked but must be presered. 410a7674320SMartin Cracauer * 4) Use the remaining bits to point into the trapcode table. 411a7674320SMartin Cracauer * 412a7674320SMartin Cracauer * The 6 maskable bits in order of their preference, as stated in the 413a7674320SMartin Cracauer * above referenced Intel manual: 414a7674320SMartin Cracauer * 1 Invalid operation (FP_X_INV) 415a7674320SMartin Cracauer * 1a Stack underflow 416a7674320SMartin Cracauer * 1b Stack overflow 417a7674320SMartin Cracauer * 1c Operand of unsupported format 418a7674320SMartin Cracauer * 1d SNaN operand. 419a7674320SMartin Cracauer * 2 QNaN operand (not an exception, irrelavant here) 420a7674320SMartin Cracauer * 3 Any other invalid-operation not mentioned above or zero divide 421a7674320SMartin Cracauer * (FP_X_INV, FP_X_DZ) 422a7674320SMartin Cracauer * 4 Denormal operand (FP_X_DNML) 423a7674320SMartin Cracauer * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 424784648c6SMartin Cracauer * 6 Inexact result (FP_X_IMP) 425784648c6SMartin Cracauer */ 426a7674320SMartin Cracauer static char fpetable[128] = { 427a7674320SMartin Cracauer 0, 428a7674320SMartin Cracauer FPE_FLTINV, /* 1 - INV */ 429a7674320SMartin Cracauer FPE_FLTUND, /* 2 - DNML */ 430a7674320SMartin Cracauer FPE_FLTINV, /* 3 - INV | DNML */ 431a7674320SMartin Cracauer FPE_FLTDIV, /* 4 - DZ */ 432a7674320SMartin Cracauer FPE_FLTINV, /* 5 - INV | DZ */ 433a7674320SMartin Cracauer FPE_FLTDIV, /* 6 - DNML | DZ */ 434a7674320SMartin Cracauer FPE_FLTINV, /* 7 - INV | DNML | DZ */ 435a7674320SMartin Cracauer FPE_FLTOVF, /* 8 - OFL */ 436a7674320SMartin Cracauer FPE_FLTINV, /* 9 - INV | OFL */ 437a7674320SMartin Cracauer FPE_FLTUND, /* A - DNML | OFL */ 438a7674320SMartin Cracauer FPE_FLTINV, /* B - INV | DNML | OFL */ 439a7674320SMartin Cracauer FPE_FLTDIV, /* C - DZ | OFL */ 440a7674320SMartin Cracauer FPE_FLTINV, /* D - INV | DZ | OFL */ 441a7674320SMartin Cracauer FPE_FLTDIV, /* E - DNML | DZ | OFL */ 442a7674320SMartin Cracauer FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 443a7674320SMartin Cracauer FPE_FLTUND, /* 10 - UFL */ 444a7674320SMartin Cracauer FPE_FLTINV, /* 11 - INV | UFL */ 445a7674320SMartin Cracauer FPE_FLTUND, /* 12 - DNML | UFL */ 446a7674320SMartin Cracauer FPE_FLTINV, /* 13 - INV | DNML | UFL */ 447a7674320SMartin Cracauer FPE_FLTDIV, /* 14 - DZ | UFL */ 448a7674320SMartin Cracauer FPE_FLTINV, /* 15 - INV | DZ | UFL */ 449a7674320SMartin Cracauer FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 450a7674320SMartin Cracauer FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 451a7674320SMartin Cracauer FPE_FLTOVF, /* 18 - OFL | UFL */ 452a7674320SMartin Cracauer FPE_FLTINV, /* 19 - INV | OFL | UFL */ 453a7674320SMartin Cracauer FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 454a7674320SMartin Cracauer FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 455a7674320SMartin Cracauer FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 456a7674320SMartin Cracauer FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 457a7674320SMartin Cracauer FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 458a7674320SMartin Cracauer FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 459a7674320SMartin Cracauer FPE_FLTRES, /* 20 - IMP */ 460a7674320SMartin Cracauer FPE_FLTINV, /* 21 - INV | IMP */ 461a7674320SMartin Cracauer FPE_FLTUND, /* 22 - DNML | IMP */ 462a7674320SMartin Cracauer FPE_FLTINV, /* 23 - INV | DNML | IMP */ 463a7674320SMartin Cracauer FPE_FLTDIV, /* 24 - DZ | IMP */ 464a7674320SMartin Cracauer FPE_FLTINV, /* 25 - INV | DZ | IMP */ 465a7674320SMartin Cracauer FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 466a7674320SMartin Cracauer FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 467a7674320SMartin Cracauer FPE_FLTOVF, /* 28 - OFL | IMP */ 468a7674320SMartin Cracauer FPE_FLTINV, /* 29 - INV | OFL | IMP */ 469a7674320SMartin Cracauer FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 470a7674320SMartin Cracauer FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 471a7674320SMartin Cracauer FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 472a7674320SMartin Cracauer FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 473a7674320SMartin Cracauer FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 474a7674320SMartin Cracauer FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 475a7674320SMartin Cracauer FPE_FLTUND, /* 30 - UFL | IMP */ 476a7674320SMartin Cracauer FPE_FLTINV, /* 31 - INV | UFL | IMP */ 477a7674320SMartin Cracauer FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 478a7674320SMartin Cracauer FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 479a7674320SMartin Cracauer FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 480a7674320SMartin Cracauer FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 481a7674320SMartin Cracauer FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 482a7674320SMartin Cracauer FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 483a7674320SMartin Cracauer FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 484a7674320SMartin Cracauer FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 485a7674320SMartin Cracauer FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 486a7674320SMartin Cracauer FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 487a7674320SMartin Cracauer FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 488a7674320SMartin Cracauer FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 489a7674320SMartin Cracauer FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 490a7674320SMartin Cracauer FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 491a7674320SMartin Cracauer FPE_FLTSUB, /* 40 - STK */ 492a7674320SMartin Cracauer FPE_FLTSUB, /* 41 - INV | STK */ 493a7674320SMartin Cracauer FPE_FLTUND, /* 42 - DNML | STK */ 494a7674320SMartin Cracauer FPE_FLTSUB, /* 43 - INV | DNML | STK */ 495a7674320SMartin Cracauer FPE_FLTDIV, /* 44 - DZ | STK */ 496a7674320SMartin Cracauer FPE_FLTSUB, /* 45 - INV | DZ | STK */ 497a7674320SMartin Cracauer FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 498a7674320SMartin Cracauer FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 499a7674320SMartin Cracauer FPE_FLTOVF, /* 48 - OFL | STK */ 500a7674320SMartin Cracauer FPE_FLTSUB, /* 49 - INV | OFL | STK */ 501a7674320SMartin Cracauer FPE_FLTUND, /* 4A - DNML | OFL | STK */ 502a7674320SMartin Cracauer FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 503a7674320SMartin Cracauer FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 504a7674320SMartin Cracauer FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 505a7674320SMartin Cracauer FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 506a7674320SMartin Cracauer FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 507a7674320SMartin Cracauer FPE_FLTUND, /* 50 - UFL | STK */ 508a7674320SMartin Cracauer FPE_FLTSUB, /* 51 - INV | UFL | STK */ 509a7674320SMartin Cracauer FPE_FLTUND, /* 52 - DNML | UFL | STK */ 510a7674320SMartin Cracauer FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 511a7674320SMartin Cracauer FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 512a7674320SMartin Cracauer FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 513a7674320SMartin Cracauer FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 514a7674320SMartin Cracauer FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 515a7674320SMartin Cracauer FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 516a7674320SMartin Cracauer FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 517a7674320SMartin Cracauer FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 518a7674320SMartin Cracauer FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 519a7674320SMartin Cracauer FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 520a7674320SMartin Cracauer FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 521a7674320SMartin Cracauer FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 522a7674320SMartin Cracauer FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 523a7674320SMartin Cracauer FPE_FLTRES, /* 60 - IMP | STK */ 524a7674320SMartin Cracauer FPE_FLTSUB, /* 61 - INV | IMP | STK */ 525a7674320SMartin Cracauer FPE_FLTUND, /* 62 - DNML | IMP | STK */ 526a7674320SMartin Cracauer FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 527a7674320SMartin Cracauer FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 528a7674320SMartin Cracauer FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 529a7674320SMartin Cracauer FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 530a7674320SMartin Cracauer FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 531a7674320SMartin Cracauer FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 532a7674320SMartin Cracauer FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 533a7674320SMartin Cracauer FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 534a7674320SMartin Cracauer FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 535a7674320SMartin Cracauer FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 536a7674320SMartin Cracauer FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 537a7674320SMartin Cracauer FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 538a7674320SMartin Cracauer FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 539a7674320SMartin Cracauer FPE_FLTUND, /* 70 - UFL | IMP | STK */ 540a7674320SMartin Cracauer FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 541a7674320SMartin Cracauer FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 542a7674320SMartin Cracauer FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 543a7674320SMartin Cracauer FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 544a7674320SMartin Cracauer FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 545a7674320SMartin Cracauer FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 546a7674320SMartin Cracauer FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 547a7674320SMartin Cracauer FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 548a7674320SMartin Cracauer FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 549a7674320SMartin Cracauer FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 550a7674320SMartin Cracauer FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 551a7674320SMartin Cracauer FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 552a7674320SMartin Cracauer FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 553a7674320SMartin Cracauer FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 554a7674320SMartin Cracauer FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 555a7674320SMartin Cracauer }; 556a7674320SMartin Cracauer 557a7674320SMartin Cracauer /* 558dfa8a512SKonstantin Belousov * Read the FP status and control words, then generate si_code value 559dfa8a512SKonstantin Belousov * for SIGFPE. The error code chosen will be one of the 560dfa8a512SKonstantin Belousov * FPE_... macros. It will be sent as the second argument to old 561dfa8a512SKonstantin Belousov * BSD-style signal handlers and as "siginfo_t->si_code" (second 562dfa8a512SKonstantin Belousov * argument) to SA_SIGINFO signal handlers. 5635b81b6b3SRodney W. Grimes * 564dfa8a512SKonstantin Belousov * Some time ago, we cleared the x87 exceptions with FNCLEX there. 565dfa8a512SKonstantin Belousov * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The 566dfa8a512SKonstantin Belousov * usermode code which understands the FPU hardware enough to enable 567dfa8a512SKonstantin Belousov * the exceptions, can also handle clearing the exception state in the 568dfa8a512SKonstantin Belousov * handler. The only consequence of not clearing the exception is the 569dfa8a512SKonstantin Belousov * rethrow of the SIGFPE on return from the signal handler and 570dfa8a512SKonstantin Belousov * reexecution of the corresponding instruction. 571bc84db62SKonstantin Belousov * 572dfa8a512SKonstantin Belousov * For XMM traps, the exceptions were never cleared. 5735b81b6b3SRodney W. Grimes */ 5741c1771cbSBruce Evans int 575bc84db62SKonstantin Belousov fputrap_x87(void) 5765b81b6b3SRodney W. Grimes { 577bc84db62SKonstantin Belousov struct savefpu *pcb_save; 5781c1771cbSBruce Evans u_short control, status; 5795b81b6b3SRodney W. Grimes 58099753495SKonstantin Belousov critical_enter(); 5815b81b6b3SRodney W. Grimes 5825b81b6b3SRodney W. Grimes /* 5831c1771cbSBruce Evans * Interrupt handling (for another interrupt) may have pushed the 5841c1771cbSBruce Evans * state to memory. Fetch the relevant parts of the state from 5851c1771cbSBruce Evans * wherever they are. 5865b81b6b3SRodney W. Grimes */ 5870bbc8826SJohn Baldwin if (PCPU_GET(fpcurthread) != curthread) { 58883b22b05SKonstantin Belousov pcb_save = curpcb->pcb_save; 589bc84db62SKonstantin Belousov control = pcb_save->sv_env.en_cw; 590bc84db62SKonstantin Belousov status = pcb_save->sv_env.en_sw; 5915b81b6b3SRodney W. Grimes } else { 5921c1771cbSBruce Evans fnstcw(&control); 5931c1771cbSBruce Evans fnstsw(&status); 5945b81b6b3SRodney W. Grimes } 5951c1771cbSBruce Evans 59699753495SKonstantin Belousov critical_exit(); 5971c1771cbSBruce Evans return (fpetable[status & ((~control & 0x3f) | 0x40)]); 5985b81b6b3SRodney W. Grimes } 5995b81b6b3SRodney W. Grimes 600bc84db62SKonstantin Belousov int 601bc84db62SKonstantin Belousov fputrap_sse(void) 602bc84db62SKonstantin Belousov { 603bc84db62SKonstantin Belousov u_int mxcsr; 604bc84db62SKonstantin Belousov 605bc84db62SKonstantin Belousov critical_enter(); 606bc84db62SKonstantin Belousov if (PCPU_GET(fpcurthread) != curthread) 60783b22b05SKonstantin Belousov mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; 608bc84db62SKonstantin Belousov else 609bc84db62SKonstantin Belousov stmxcsr(&mxcsr); 610bc84db62SKonstantin Belousov critical_exit(); 611bc84db62SKonstantin Belousov return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); 612bc84db62SKonstantin Belousov } 613bc84db62SKonstantin Belousov 6146dfc9e44SKonstantin Belousov /* 6156dfc9e44SKonstantin Belousov * Device Not Available (DNA, #NM) exception handler. 6166dfc9e44SKonstantin Belousov * 6176dfc9e44SKonstantin Belousov * It would be better to switch FP context here (if curthread != 6186dfc9e44SKonstantin Belousov * fpcurthread) and not necessarily for every context switch, but it 6196dfc9e44SKonstantin Belousov * is too hard to access foreign pcb's. 6206dfc9e44SKonstantin Belousov */ 621a8346a98SJohn Baldwin void 622a8346a98SJohn Baldwin fpudna(void) 6235b81b6b3SRodney W. Grimes { 62405f6ee66SJake Burkholder 625060cd4d5SKonstantin Belousov /* 626060cd4d5SKonstantin Belousov * This handler is entered with interrupts enabled, so context 627060cd4d5SKonstantin Belousov * switches may occur before critical_enter() is executed. If 628060cd4d5SKonstantin Belousov * a context switch occurs, then when we regain control, our 629060cd4d5SKonstantin Belousov * state will have been completely restored. The CPU may 630060cd4d5SKonstantin Belousov * change underneath us, but the only part of our context that 631060cd4d5SKonstantin Belousov * lives in the CPU is CR0.TS and that will be "restored" by 632060cd4d5SKonstantin Belousov * setting it on the new CPU. 633060cd4d5SKonstantin Belousov */ 63499753495SKonstantin Belousov critical_enter(); 635060cd4d5SKonstantin Belousov 636cf1c4776SKonstantin Belousov KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, 637cf1c4776SKonstantin Belousov ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); 63830abe507SJonathan Mini if (PCPU_GET(fpcurthread) == curthread) { 639060cd4d5SKonstantin Belousov printf("fpudna: fpcurthread == curthread\n"); 64030abe507SJonathan Mini stop_emulating(); 64199753495SKonstantin Belousov critical_exit(); 642a8346a98SJohn Baldwin return; 64330abe507SJonathan Mini } 6440bbc8826SJohn Baldwin if (PCPU_GET(fpcurthread) != NULL) { 6456dfc9e44SKonstantin Belousov panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", 6466dfc9e44SKonstantin Belousov PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, 6476dfc9e44SKonstantin Belousov curthread, curthread->td_tid); 6485b81b6b3SRodney W. Grimes } 6495b81b6b3SRodney W. Grimes stop_emulating(); 6505b81b6b3SRodney W. Grimes /* 651bf2f09eeSPeter Wemm * Record new context early in case frstor causes a trap. 6525b81b6b3SRodney W. Grimes */ 6530bbc8826SJohn Baldwin PCPU_SET(fpcurthread, curthread); 6549d146ac5SPeter Wemm 6552652af56SColin Percival fpu_clean_state(); 6562652af56SColin Percival 6571965c139SKonstantin Belousov if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { 6585b81b6b3SRodney W. Grimes /* 65963de9515SJohn Baldwin * This is the first time this thread has used the FPU or 66063de9515SJohn Baldwin * the PCB doesn't contain a clean FPU state. Explicitly 66163de9515SJohn Baldwin * load an initial state. 662333d0c60SKonstantin Belousov * 663333d0c60SKonstantin Belousov * We prefer to restore the state from the actual save 664333d0c60SKonstantin Belousov * area in PCB instead of directly loading from 665333d0c60SKonstantin Belousov * fpu_initialstate, to ignite the XSAVEOPT 666333d0c60SKonstantin Belousov * tracking engine. 6675b81b6b3SRodney W. Grimes */ 668f132cd05SKonstantin Belousov bcopy(fpu_initialstate, curpcb->pcb_save, 669f132cd05SKonstantin Belousov cpu_max_ext_state_size); 6701965c139SKonstantin Belousov fpurestore(curpcb->pcb_save); 6711965c139SKonstantin Belousov if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) 6721965c139SKonstantin Belousov fldcw(curpcb->pcb_initial_fpucw); 6731965c139SKonstantin Belousov if (PCB_USER_FPU(curpcb)) 6741965c139SKonstantin Belousov set_pcb_flags(curpcb, 675e6c006d9SJung-uk Kim PCB_FPUINITDONE | PCB_USERFPUINITDONE); 676e6c006d9SJung-uk Kim else 6771965c139SKonstantin Belousov set_pcb_flags(curpcb, PCB_FPUINITDONE); 6781c89210cSPeter Wemm } else 6791965c139SKonstantin Belousov fpurestore(curpcb->pcb_save); 68099753495SKonstantin Belousov critical_exit(); 6815b81b6b3SRodney W. Grimes } 6825b81b6b3SRodney W. Grimes 68330abe507SJonathan Mini void 684f132cd05SKonstantin Belousov fpudrop(void) 68530abe507SJonathan Mini { 68630abe507SJonathan Mini struct thread *td; 68730abe507SJonathan Mini 68830abe507SJonathan Mini td = PCPU_GET(fpcurthread); 68999753495SKonstantin Belousov KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 6904a23ecc7SKonstantin Belousov CRITICAL_ASSERT(td); 69130abe507SJonathan Mini PCPU_SET(fpcurthread, NULL); 692e6c006d9SJung-uk Kim clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); 69330abe507SJonathan Mini start_emulating(); 69430abe507SJonathan Mini } 69530abe507SJonathan Mini 69630abe507SJonathan Mini /* 6975c6eb037SKonstantin Belousov * Get the user state of the FPU into pcb->pcb_user_save without 6985c6eb037SKonstantin Belousov * dropping ownership (if possible). It returns the FPU ownership 6995c6eb037SKonstantin Belousov * status. 70030abe507SJonathan Mini */ 70130abe507SJonathan Mini int 7025c6eb037SKonstantin Belousov fpugetregs(struct thread *td) 7036cf9a08dSKonstantin Belousov { 7046cf9a08dSKonstantin Belousov struct pcb *pcb; 705333d0c60SKonstantin Belousov uint64_t *xstate_bv, bit; 706333d0c60SKonstantin Belousov char *sa; 70714f52559SKonstantin Belousov int max_ext_n, i, owned; 7086cf9a08dSKonstantin Belousov 7096cf9a08dSKonstantin Belousov pcb = td->td_pcb; 7106cf9a08dSKonstantin Belousov if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { 7118c6f8f3dSKonstantin Belousov bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), 7128c6f8f3dSKonstantin Belousov cpu_max_ext_state_size); 7138c6f8f3dSKonstantin Belousov get_pcb_user_save_pcb(pcb)->sv_env.en_cw = 7148c6f8f3dSKonstantin Belousov pcb->pcb_initial_fpucw; 7155c6eb037SKonstantin Belousov fpuuserinited(td); 7165c6eb037SKonstantin Belousov return (_MC_FPOWNED_PCB); 7176cf9a08dSKonstantin Belousov } 71899753495SKonstantin Belousov critical_enter(); 7196cf9a08dSKonstantin Belousov if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 7208c6f8f3dSKonstantin Belousov fpusave(get_pcb_user_save_pcb(pcb)); 72114f52559SKonstantin Belousov owned = _MC_FPOWNED_FPU; 7226cf9a08dSKonstantin Belousov } else { 72314f52559SKonstantin Belousov owned = _MC_FPOWNED_PCB; 72414f52559SKonstantin Belousov } 72599753495SKonstantin Belousov critical_exit(); 72614f52559SKonstantin Belousov if (use_xsave) { 727333d0c60SKonstantin Belousov /* 728333d0c60SKonstantin Belousov * Handle partially saved state. 729333d0c60SKonstantin Belousov */ 730333d0c60SKonstantin Belousov sa = (char *)get_pcb_user_save_pcb(pcb); 731333d0c60SKonstantin Belousov xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + 732333d0c60SKonstantin Belousov offsetof(struct xstate_hdr, xstate_bv)); 733333d0c60SKonstantin Belousov max_ext_n = flsl(xsave_mask); 734333d0c60SKonstantin Belousov for (i = 0; i < max_ext_n; i++) { 735241b67bbSKonstantin Belousov bit = 1ULL << i; 736241b67bbSKonstantin Belousov if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) 737333d0c60SKonstantin Belousov continue; 738333d0c60SKonstantin Belousov bcopy((char *)fpu_initialstate + 739333d0c60SKonstantin Belousov xsave_area_desc[i].offset, 740333d0c60SKonstantin Belousov sa + xsave_area_desc[i].offset, 741333d0c60SKonstantin Belousov xsave_area_desc[i].size); 742333d0c60SKonstantin Belousov *xstate_bv |= bit; 743333d0c60SKonstantin Belousov } 744333d0c60SKonstantin Belousov } 74514f52559SKonstantin Belousov return (owned); 7466cf9a08dSKonstantin Belousov } 7476cf9a08dSKonstantin Belousov 7485c6eb037SKonstantin Belousov void 7495c6eb037SKonstantin Belousov fpuuserinited(struct thread *td) 75030abe507SJonathan Mini { 7516cf9a08dSKonstantin Belousov struct pcb *pcb; 75230abe507SJonathan Mini 7536cf9a08dSKonstantin Belousov pcb = td->td_pcb; 7545c6eb037SKonstantin Belousov if (PCB_USER_FPU(pcb)) 755e6c006d9SJung-uk Kim set_pcb_flags(pcb, 756e6c006d9SJung-uk Kim PCB_FPUINITDONE | PCB_USERFPUINITDONE); 757e6c006d9SJung-uk Kim else 758e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 75930abe507SJonathan Mini } 76030abe507SJonathan Mini 7618c6f8f3dSKonstantin Belousov int 7628c6f8f3dSKonstantin Belousov fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) 7638c6f8f3dSKonstantin Belousov { 7648c6f8f3dSKonstantin Belousov struct xstate_hdr *hdr, *ehdr; 7658c6f8f3dSKonstantin Belousov size_t len, max_len; 7668c6f8f3dSKonstantin Belousov uint64_t bv; 7678c6f8f3dSKonstantin Belousov 7688c6f8f3dSKonstantin Belousov /* XXXKIB should we clear all extended state in xstate_bv instead ? */ 7698c6f8f3dSKonstantin Belousov if (xfpustate == NULL) 7708c6f8f3dSKonstantin Belousov return (0); 7718c6f8f3dSKonstantin Belousov if (!use_xsave) 7728c6f8f3dSKonstantin Belousov return (EOPNOTSUPP); 7738c6f8f3dSKonstantin Belousov 7748c6f8f3dSKonstantin Belousov len = xfpustate_size; 7758c6f8f3dSKonstantin Belousov if (len < sizeof(struct xstate_hdr)) 7768c6f8f3dSKonstantin Belousov return (EINVAL); 7778c6f8f3dSKonstantin Belousov max_len = cpu_max_ext_state_size - sizeof(struct savefpu); 7788c6f8f3dSKonstantin Belousov if (len > max_len) 7798c6f8f3dSKonstantin Belousov return (EINVAL); 7808c6f8f3dSKonstantin Belousov 7818c6f8f3dSKonstantin Belousov ehdr = (struct xstate_hdr *)xfpustate; 7828c6f8f3dSKonstantin Belousov bv = ehdr->xstate_bv; 7838c6f8f3dSKonstantin Belousov 7848c6f8f3dSKonstantin Belousov /* 7858c6f8f3dSKonstantin Belousov * Avoid #gp. 7868c6f8f3dSKonstantin Belousov */ 7878c6f8f3dSKonstantin Belousov if (bv & ~xsave_mask) 7888c6f8f3dSKonstantin Belousov return (EINVAL); 7898c6f8f3dSKonstantin Belousov 7908c6f8f3dSKonstantin Belousov hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); 7918c6f8f3dSKonstantin Belousov 7928c6f8f3dSKonstantin Belousov hdr->xstate_bv = bv; 7938c6f8f3dSKonstantin Belousov bcopy(xfpustate + sizeof(struct xstate_hdr), 7948c6f8f3dSKonstantin Belousov (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); 7958c6f8f3dSKonstantin Belousov 7968c6f8f3dSKonstantin Belousov return (0); 7978c6f8f3dSKonstantin Belousov } 7988c6f8f3dSKonstantin Belousov 79930abe507SJonathan Mini /* 80030abe507SJonathan Mini * Set the state of the FPU. 80130abe507SJonathan Mini */ 8028c6f8f3dSKonstantin Belousov int 8038c6f8f3dSKonstantin Belousov fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, 8048c6f8f3dSKonstantin Belousov size_t xfpustate_size) 8056cf9a08dSKonstantin Belousov { 8066cf9a08dSKonstantin Belousov struct pcb *pcb; 8078c6f8f3dSKonstantin Belousov int error; 8086cf9a08dSKonstantin Belousov 809*aa788cc3SKonstantin Belousov addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; 8106cf9a08dSKonstantin Belousov pcb = td->td_pcb; 81199753495SKonstantin Belousov critical_enter(); 8126cf9a08dSKonstantin Belousov if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 8138c6f8f3dSKonstantin Belousov error = fpusetxstate(td, xfpustate, xfpustate_size); 8148c6f8f3dSKonstantin Belousov if (error != 0) { 8158c6f8f3dSKonstantin Belousov critical_exit(); 8168c6f8f3dSKonstantin Belousov return (error); 8178c6f8f3dSKonstantin Belousov } 8188c6f8f3dSKonstantin Belousov bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 8198c6f8f3dSKonstantin Belousov fpurestore(get_pcb_user_save_td(td)); 82099753495SKonstantin Belousov critical_exit(); 821e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); 8226cf9a08dSKonstantin Belousov } else { 82399753495SKonstantin Belousov critical_exit(); 8248c6f8f3dSKonstantin Belousov error = fpusetxstate(td, xfpustate, xfpustate_size); 8258c6f8f3dSKonstantin Belousov if (error != 0) 8268c6f8f3dSKonstantin Belousov return (error); 8278c6f8f3dSKonstantin Belousov bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 8285c6eb037SKonstantin Belousov fpuuserinited(td); 8296cf9a08dSKonstantin Belousov } 8308c6f8f3dSKonstantin Belousov return (0); 8316cf9a08dSKonstantin Belousov } 8326cf9a08dSKonstantin Belousov 8336182fdbdSPeter Wemm /* 8342652af56SColin Percival * On AuthenticAMD processors, the fxrstor instruction does not restore 8352652af56SColin Percival * the x87's stored last instruction pointer, last data pointer, and last 8362652af56SColin Percival * opcode values, except in the rare case in which the exception summary 8372652af56SColin Percival * (ES) bit in the x87 status word is set to 1. 8382652af56SColin Percival * 8392652af56SColin Percival * In order to avoid leaking this information across processes, we clean 8402652af56SColin Percival * these values by performing a dummy load before executing fxrstor(). 8412652af56SColin Percival */ 8422652af56SColin Percival static void 8432652af56SColin Percival fpu_clean_state(void) 8442652af56SColin Percival { 845b9dda9d6SJohn Baldwin static float dummy_variable = 0.0; 8462652af56SColin Percival u_short status; 8472652af56SColin Percival 8482652af56SColin Percival /* 8492652af56SColin Percival * Clear the ES bit in the x87 status word if it is currently 8502652af56SColin Percival * set, in order to avoid causing a fault in the upcoming load. 8512652af56SColin Percival */ 8522652af56SColin Percival fnstsw(&status); 8532652af56SColin Percival if (status & 0x80) 8542652af56SColin Percival fnclex(); 8552652af56SColin Percival 8562652af56SColin Percival /* 8572652af56SColin Percival * Load the dummy variable into the x87 stack. This mangles 8582652af56SColin Percival * the x87 stack, but we don't care since we're about to call 8592652af56SColin Percival * fxrstor() anyway. 8602652af56SColin Percival */ 86114965052SDimitry Andric __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 8622652af56SColin Percival } 8632652af56SColin Percival 8642652af56SColin Percival /* 865398dbb11SPeter Wemm * This really sucks. We want the acpi version only, but it requires 866398dbb11SPeter Wemm * the isa_if.h file in order to get the definitions. 8676182fdbdSPeter Wemm */ 868398dbb11SPeter Wemm #include "opt_isa.h" 869afa88623SPeter Wemm #ifdef DEV_ISA 870398dbb11SPeter Wemm #include <isa/isavar.h> 87154f1d0ceSGarrett Wollman /* 8725f063c7bSMike Smith * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 87354f1d0ceSGarrett Wollman */ 874398dbb11SPeter Wemm static struct isa_pnp_id fpupnp_ids[] = { 87554f1d0ceSGarrett Wollman { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 87654f1d0ceSGarrett Wollman { 0 } 87754f1d0ceSGarrett Wollman }; 87854f1d0ceSGarrett Wollman 87954f1d0ceSGarrett Wollman static int 880398dbb11SPeter Wemm fpupnp_probe(device_t dev) 88154f1d0ceSGarrett Wollman { 882bb9c06c1SMike Smith int result; 883bf2f09eeSPeter Wemm 884398dbb11SPeter Wemm result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); 885bf2f09eeSPeter Wemm if (result <= 0) 886bb9c06c1SMike Smith device_quiet(dev); 887bb9c06c1SMike Smith return (result); 88854f1d0ceSGarrett Wollman } 88954f1d0ceSGarrett Wollman 89054f1d0ceSGarrett Wollman static int 891398dbb11SPeter Wemm fpupnp_attach(device_t dev) 89254f1d0ceSGarrett Wollman { 893bf2f09eeSPeter Wemm 89454f1d0ceSGarrett Wollman return (0); 89554f1d0ceSGarrett Wollman } 89654f1d0ceSGarrett Wollman 897398dbb11SPeter Wemm static device_method_t fpupnp_methods[] = { 89854f1d0ceSGarrett Wollman /* Device interface */ 899398dbb11SPeter Wemm DEVMETHOD(device_probe, fpupnp_probe), 900398dbb11SPeter Wemm DEVMETHOD(device_attach, fpupnp_attach), 90154f1d0ceSGarrett Wollman DEVMETHOD(device_detach, bus_generic_detach), 90254f1d0ceSGarrett Wollman DEVMETHOD(device_shutdown, bus_generic_shutdown), 90354f1d0ceSGarrett Wollman DEVMETHOD(device_suspend, bus_generic_suspend), 90454f1d0ceSGarrett Wollman DEVMETHOD(device_resume, bus_generic_resume), 90554f1d0ceSGarrett Wollman 90654f1d0ceSGarrett Wollman { 0, 0 } 90754f1d0ceSGarrett Wollman }; 90854f1d0ceSGarrett Wollman 909398dbb11SPeter Wemm static driver_t fpupnp_driver = { 910398dbb11SPeter Wemm "fpupnp", 911398dbb11SPeter Wemm fpupnp_methods, 91254f1d0ceSGarrett Wollman 1, /* no softc */ 91354f1d0ceSGarrett Wollman }; 91454f1d0ceSGarrett Wollman 915398dbb11SPeter Wemm static devclass_t fpupnp_devclass; 91654f1d0ceSGarrett Wollman 917398dbb11SPeter Wemm DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); 918586079ccSBruce Evans #endif /* DEV_ISA */ 9196cf9a08dSKonstantin Belousov 9208c6f8f3dSKonstantin Belousov static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 9218c6f8f3dSKonstantin Belousov "Kernel contexts for FPU state"); 9228c6f8f3dSKonstantin Belousov 9238c6f8f3dSKonstantin Belousov #define FPU_KERN_CTX_FPUINITDONE 0x01 924633034feSKonstantin Belousov #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ 925e808e13bSJohn-Mark Gurney #define FPU_KERN_CTX_INUSE 0x04 9268c6f8f3dSKonstantin Belousov 9278c6f8f3dSKonstantin Belousov struct fpu_kern_ctx { 9288c6f8f3dSKonstantin Belousov struct savefpu *prev; 9298c6f8f3dSKonstantin Belousov uint32_t flags; 9308c6f8f3dSKonstantin Belousov char hwstate1[]; 9318c6f8f3dSKonstantin Belousov }; 9328c6f8f3dSKonstantin Belousov 9338c6f8f3dSKonstantin Belousov struct fpu_kern_ctx * 9348c6f8f3dSKonstantin Belousov fpu_kern_alloc_ctx(u_int flags) 9358c6f8f3dSKonstantin Belousov { 9368c6f8f3dSKonstantin Belousov struct fpu_kern_ctx *res; 9378c6f8f3dSKonstantin Belousov size_t sz; 9388c6f8f3dSKonstantin Belousov 9398c6f8f3dSKonstantin Belousov sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + 9408c6f8f3dSKonstantin Belousov cpu_max_ext_state_size; 9418c6f8f3dSKonstantin Belousov res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? 9428c6f8f3dSKonstantin Belousov M_NOWAIT : M_WAITOK) | M_ZERO); 9438c6f8f3dSKonstantin Belousov return (res); 9448c6f8f3dSKonstantin Belousov } 9458c6f8f3dSKonstantin Belousov 9468c6f8f3dSKonstantin Belousov void 9478c6f8f3dSKonstantin Belousov fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 9488c6f8f3dSKonstantin Belousov { 9498c6f8f3dSKonstantin Belousov 950e808e13bSJohn-Mark Gurney KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); 9518c6f8f3dSKonstantin Belousov /* XXXKIB clear the memory ? */ 9528c6f8f3dSKonstantin Belousov free(ctx, M_FPUKERN_CTX); 9538c6f8f3dSKonstantin Belousov } 9548c6f8f3dSKonstantin Belousov 9558c6f8f3dSKonstantin Belousov static struct savefpu * 9568c6f8f3dSKonstantin Belousov fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) 9578c6f8f3dSKonstantin Belousov { 9588c6f8f3dSKonstantin Belousov vm_offset_t p; 9598c6f8f3dSKonstantin Belousov 9608c6f8f3dSKonstantin Belousov p = (vm_offset_t)&ctx->hwstate1; 9618c6f8f3dSKonstantin Belousov p = roundup2(p, XSAVE_AREA_ALIGN); 9628c6f8f3dSKonstantin Belousov return ((struct savefpu *)p); 9638c6f8f3dSKonstantin Belousov } 9648c6f8f3dSKonstantin Belousov 9656cf9a08dSKonstantin Belousov int 9666cf9a08dSKonstantin Belousov fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 9676cf9a08dSKonstantin Belousov { 9686cf9a08dSKonstantin Belousov struct pcb *pcb; 9696cf9a08dSKonstantin Belousov 970cf1c4776SKonstantin Belousov pcb = td->td_pcb; 971cf1c4776SKonstantin Belousov KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, 972cf1c4776SKonstantin Belousov ("ctx is required when !FPU_KERN_NOCTX")); 973cf1c4776SKonstantin Belousov KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, 974cf1c4776SKonstantin Belousov ("using inuse ctx")); 975cf1c4776SKonstantin Belousov KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, 976cf1c4776SKonstantin Belousov ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); 977e808e13bSJohn-Mark Gurney 978cf1c4776SKonstantin Belousov if ((flags & FPU_KERN_NOCTX) != 0) { 979cf1c4776SKonstantin Belousov critical_enter(); 980cf1c4776SKonstantin Belousov stop_emulating(); 981cf1c4776SKonstantin Belousov if (curthread == PCPU_GET(fpcurthread)) { 982cf1c4776SKonstantin Belousov fpusave(curpcb->pcb_save); 983cf1c4776SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 984cf1c4776SKonstantin Belousov } else { 985cf1c4776SKonstantin Belousov KASSERT(PCPU_GET(fpcurthread) == NULL, 986cf1c4776SKonstantin Belousov ("invalid fpcurthread")); 987cf1c4776SKonstantin Belousov } 988cf1c4776SKonstantin Belousov 989cf1c4776SKonstantin Belousov /* 990cf1c4776SKonstantin Belousov * This breaks XSAVEOPT tracker, but 991cf1c4776SKonstantin Belousov * PCB_FPUNOSAVE state is supposed to never need to 992cf1c4776SKonstantin Belousov * save FPU context at all. 993cf1c4776SKonstantin Belousov */ 994cf1c4776SKonstantin Belousov fpurestore(fpu_initialstate); 995cf1c4776SKonstantin Belousov set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | 996cf1c4776SKonstantin Belousov PCB_FPUINITDONE); 997cf1c4776SKonstantin Belousov return (0); 998cf1c4776SKonstantin Belousov } 999633034feSKonstantin Belousov if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { 1000e808e13bSJohn-Mark Gurney ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; 1001633034feSKonstantin Belousov return (0); 1002633034feSKonstantin Belousov } 10038c6f8f3dSKonstantin Belousov KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == 10048c6f8f3dSKonstantin Belousov get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); 1005e808e13bSJohn-Mark Gurney ctx->flags = FPU_KERN_CTX_INUSE; 10066cf9a08dSKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) 10076cf9a08dSKonstantin Belousov ctx->flags |= FPU_KERN_CTX_FPUINITDONE; 10086cf9a08dSKonstantin Belousov fpuexit(td); 10096cf9a08dSKonstantin Belousov ctx->prev = pcb->pcb_save; 10108c6f8f3dSKonstantin Belousov pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); 1011e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_KERNFPU); 1012e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE); 10136cf9a08dSKonstantin Belousov return (0); 10146cf9a08dSKonstantin Belousov } 10156cf9a08dSKonstantin Belousov 10166cf9a08dSKonstantin Belousov int 10176cf9a08dSKonstantin Belousov fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 10186cf9a08dSKonstantin Belousov { 10196cf9a08dSKonstantin Belousov struct pcb *pcb; 10206cf9a08dSKonstantin Belousov 1021cf1c4776SKonstantin Belousov pcb = td->td_pcb; 1022cf1c4776SKonstantin Belousov 1023cf1c4776SKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { 1024cf1c4776SKonstantin Belousov KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); 1025cf1c4776SKonstantin Belousov KASSERT(PCPU_GET(fpcurthread) == NULL, 1026cf1c4776SKonstantin Belousov ("non-NULL fpcurthread for PCB_FPUNOSAVE")); 1027cf1c4776SKonstantin Belousov CRITICAL_ASSERT(td); 1028cf1c4776SKonstantin Belousov 1029cf1c4776SKonstantin Belousov clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); 1030cf1c4776SKonstantin Belousov start_emulating(); 1031cf1c4776SKonstantin Belousov critical_exit(); 1032cf1c4776SKonstantin Belousov } else { 1033e808e13bSJohn-Mark Gurney KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, 1034e808e13bSJohn-Mark Gurney ("leaving not inuse ctx")); 1035e808e13bSJohn-Mark Gurney ctx->flags &= ~FPU_KERN_CTX_INUSE; 1036e808e13bSJohn-Mark Gurney 1037cf1c4776SKonstantin Belousov if (is_fpu_kern_thread(0) && 1038cf1c4776SKonstantin Belousov (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) 1039633034feSKonstantin Belousov return (0); 1040cf1c4776SKonstantin Belousov KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, 1041cf1c4776SKonstantin Belousov ("dummy ctx")); 104299753495SKonstantin Belousov critical_enter(); 10436cf9a08dSKonstantin Belousov if (curthread == PCPU_GET(fpcurthread)) 10446cf9a08dSKonstantin Belousov fpudrop(); 104599753495SKonstantin Belousov critical_exit(); 10466cf9a08dSKonstantin Belousov pcb->pcb_save = ctx->prev; 1047cf1c4776SKonstantin Belousov } 1048cf1c4776SKonstantin Belousov 10498c6f8f3dSKonstantin Belousov if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { 1050e6c006d9SJung-uk Kim if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { 1051e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 1052e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_KERNFPU); 1053e6c006d9SJung-uk Kim } else 1054e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); 10556cf9a08dSKonstantin Belousov } else { 10566cf9a08dSKonstantin Belousov if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) 1057e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 10586cf9a08dSKonstantin Belousov else 1059e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE); 10606cf9a08dSKonstantin Belousov KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 10616cf9a08dSKonstantin Belousov } 10626cf9a08dSKonstantin Belousov return (0); 10636cf9a08dSKonstantin Belousov } 10646cf9a08dSKonstantin Belousov 10656cf9a08dSKonstantin Belousov int 10666cf9a08dSKonstantin Belousov fpu_kern_thread(u_int flags) 10676cf9a08dSKonstantin Belousov { 10686cf9a08dSKonstantin Belousov 10696cf9a08dSKonstantin Belousov KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 10706cf9a08dSKonstantin Belousov ("Only kthread may use fpu_kern_thread")); 10711965c139SKonstantin Belousov KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), 10728c6f8f3dSKonstantin Belousov ("mangled pcb_save")); 10731965c139SKonstantin Belousov KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); 10746cf9a08dSKonstantin Belousov 10751965c139SKonstantin Belousov set_pcb_flags(curpcb, PCB_KERNFPU); 10766cf9a08dSKonstantin Belousov return (0); 10776cf9a08dSKonstantin Belousov } 10786cf9a08dSKonstantin Belousov 10796cf9a08dSKonstantin Belousov int 10806cf9a08dSKonstantin Belousov is_fpu_kern_thread(u_int flags) 10816cf9a08dSKonstantin Belousov { 10826cf9a08dSKonstantin Belousov 10836cf9a08dSKonstantin Belousov if ((curthread->td_pflags & TDP_KTHREAD) == 0) 10846cf9a08dSKonstantin Belousov return (0); 108583b22b05SKonstantin Belousov return ((curpcb->pcb_flags & PCB_KERNFPU) != 0); 10866cf9a08dSKonstantin Belousov } 10872741efecSPeter Grehan 10882741efecSPeter Grehan /* 10892741efecSPeter Grehan * FPU save area alloc/free/init utility routines 10902741efecSPeter Grehan */ 10912741efecSPeter Grehan struct savefpu * 10922741efecSPeter Grehan fpu_save_area_alloc(void) 10932741efecSPeter Grehan { 10942741efecSPeter Grehan 10952741efecSPeter Grehan return (uma_zalloc(fpu_save_area_zone, 0)); 10962741efecSPeter Grehan } 10972741efecSPeter Grehan 10982741efecSPeter Grehan void 10992741efecSPeter Grehan fpu_save_area_free(struct savefpu *fsa) 11002741efecSPeter Grehan { 11012741efecSPeter Grehan 11022741efecSPeter Grehan uma_zfree(fpu_save_area_zone, fsa); 11032741efecSPeter Grehan } 11042741efecSPeter Grehan 11052741efecSPeter Grehan void 11062741efecSPeter Grehan fpu_save_area_reset(struct savefpu *fsa) 11072741efecSPeter Grehan { 11082741efecSPeter Grehan 11092741efecSPeter Grehan bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); 11102741efecSPeter Grehan } 1111