15b81b6b3SRodney W. Grimes /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 45b81b6b3SRodney W. Grimes * Copyright (c) 1990 William Jolitz. 55b81b6b3SRodney W. Grimes * Copyright (c) 1991 The Regents of the University of California. 65b81b6b3SRodney W. Grimes * All rights reserved. 75b81b6b3SRodney W. Grimes * 85b81b6b3SRodney W. Grimes * Redistribution and use in source and binary forms, with or without 95b81b6b3SRodney W. Grimes * modification, are permitted provided that the following conditions 105b81b6b3SRodney W. Grimes * are met: 115b81b6b3SRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 125b81b6b3SRodney W. Grimes * notice, this list of conditions and the following disclaimer. 135b81b6b3SRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 145b81b6b3SRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 155b81b6b3SRodney W. Grimes * documentation and/or other materials provided with the distribution. 16fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 175b81b6b3SRodney W. Grimes * may be used to endorse or promote products derived from this software 185b81b6b3SRodney W. Grimes * without specific prior written permission. 195b81b6b3SRodney W. Grimes * 205b81b6b3SRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 215b81b6b3SRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 225b81b6b3SRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 235b81b6b3SRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 245b81b6b3SRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 255b81b6b3SRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 265b81b6b3SRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 275b81b6b3SRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 285b81b6b3SRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 295b81b6b3SRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 305b81b6b3SRodney W. Grimes * SUCH DAMAGE. 315b81b6b3SRodney W. Grimes * 3221616ec3SPeter Wemm * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 335b81b6b3SRodney W. Grimes */ 345b81b6b3SRodney W. Grimes 3556ae44c5SDavid E. O'Brien #include <sys/cdefs.h> 3656ae44c5SDavid E. O'Brien __FBSDID("$FreeBSD$"); 3756ae44c5SDavid E. O'Brien 38f540b106SGarrett Wollman #include <sys/param.h> 39f540b106SGarrett Wollman #include <sys/systm.h> 406182fdbdSPeter Wemm #include <sys/bus.h> 41c74a3041SConrad Meyer #include <sys/domainset.h> 423a34a5c3SPoul-Henning Kamp #include <sys/kernel.h> 43fb919e4dSMark Murray #include <sys/lock.h> 44cd59d49dSBruce Evans #include <sys/malloc.h> 456182fdbdSPeter Wemm #include <sys/module.h> 46c1ef8aacSJake Burkholder #include <sys/mutex.h> 47fb919e4dSMark Murray #include <sys/mutex.h> 48fb919e4dSMark Murray #include <sys/proc.h> 49fb919e4dSMark Murray #include <sys/sysctl.h> 50df013409SKonstantin Belousov #include <sys/sysent.h> 516182fdbdSPeter Wemm #include <machine/bus.h> 526182fdbdSPeter Wemm #include <sys/rman.h> 53663f1485SBruce Evans #include <sys/signalvar.h> 542741efecSPeter Grehan #include <vm/uma.h> 552f86936aSGarrett Wollman 567f47cf2fSBruce Evans #include <machine/cputypes.h> 577f47cf2fSBruce Evans #include <machine/frame.h> 580d2a2989SPeter Wemm #include <machine/intr_machdep.h> 59c673fe98SBruce Evans #include <machine/md_var.h> 605400ed3bSPeter Wemm #include <machine/pcb.h> 617f47cf2fSBruce Evans #include <machine/psl.h> 626182fdbdSPeter Wemm #include <machine/resource.h> 63f540b106SGarrett Wollman #include <machine/specialreg.h> 647f47cf2fSBruce Evans #include <machine/segments.h> 6530abe507SJonathan Mini #include <machine/ucontext.h> 668b4fc8b1SKonstantin Belousov #include <x86/ifunc.h> 672f86936aSGarrett Wollman 685b81b6b3SRodney W. Grimes /* 69bf2f09eeSPeter Wemm * Floating point support. 705b81b6b3SRodney W. Grimes */ 715b81b6b3SRodney W. Grimes 72a5f50ef9SJoerg Wunsch #if defined(__GNUCLIKE_ASM) && !defined(lint) 735b81b6b3SRodney W. Grimes 7417275403SJung-uk Kim #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 7530402401SJung-uk Kim #define fnclex() __asm __volatile("fnclex") 7630402401SJung-uk Kim #define fninit() __asm __volatile("fninit") 771d37f051SBruce Evans #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 782e50fa36SJung-uk Kim #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 7930402401SJung-uk Kim #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 809d146ac5SPeter Wemm #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 8107c86dcfSJung-uk Kim #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) 82e54ae825SMark Johnston #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) 835b81b6b3SRodney W. Grimes 8494818d19SKonstantin Belousov static __inline void 85df013409SKonstantin Belousov xrstor32(char *addr, uint64_t mask) 8694818d19SKonstantin Belousov { 8794818d19SKonstantin Belousov uint32_t low, hi; 8894818d19SKonstantin Belousov 8994818d19SKonstantin Belousov low = mask; 9094818d19SKonstantin Belousov hi = mask >> 32; 917574a595SJohn Baldwin __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); 9294818d19SKonstantin Belousov } 9394818d19SKonstantin Belousov 9494818d19SKonstantin Belousov static __inline void 95df013409SKonstantin Belousov xrstor64(char *addr, uint64_t mask) 96df013409SKonstantin Belousov { 97df013409SKonstantin Belousov uint32_t low, hi; 98df013409SKonstantin Belousov 99df013409SKonstantin Belousov low = mask; 100df013409SKonstantin Belousov hi = mask >> 32; 101df013409SKonstantin Belousov __asm __volatile("xrstor64 %0" : : "m" (*addr), "a" (low), "d" (hi)); 102df013409SKonstantin Belousov } 103df013409SKonstantin Belousov 104df013409SKonstantin Belousov static __inline void 105df013409SKonstantin Belousov xsave32(char *addr, uint64_t mask) 10694818d19SKonstantin Belousov { 10794818d19SKonstantin Belousov uint32_t low, hi; 10894818d19SKonstantin Belousov 10994818d19SKonstantin Belousov low = mask; 11094818d19SKonstantin Belousov hi = mask >> 32; 1117574a595SJohn Baldwin __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : 1127574a595SJohn Baldwin "memory"); 11394818d19SKonstantin Belousov } 11494818d19SKonstantin Belousov 1158207def1SConrad Meyer static __inline void 116df013409SKonstantin Belousov xsave64(char *addr, uint64_t mask) 117df013409SKonstantin Belousov { 118df013409SKonstantin Belousov uint32_t low, hi; 119df013409SKonstantin Belousov 120df013409SKonstantin Belousov low = mask; 121df013409SKonstantin Belousov hi = mask >> 32; 122df013409SKonstantin Belousov __asm __volatile("xsave64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : 123df013409SKonstantin Belousov "memory"); 124df013409SKonstantin Belousov } 125df013409SKonstantin Belousov 126df013409SKonstantin Belousov static __inline void 127df013409SKonstantin Belousov xsaveopt32(char *addr, uint64_t mask) 1288207def1SConrad Meyer { 1298207def1SConrad Meyer uint32_t low, hi; 1308207def1SConrad Meyer 1318207def1SConrad Meyer low = mask; 1328207def1SConrad Meyer hi = mask >> 32; 1338207def1SConrad Meyer __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : 1348207def1SConrad Meyer "memory"); 1358207def1SConrad Meyer } 1368207def1SConrad Meyer 137df013409SKonstantin Belousov static __inline void 138df013409SKonstantin Belousov xsaveopt64(char *addr, uint64_t mask) 139df013409SKonstantin Belousov { 140df013409SKonstantin Belousov uint32_t low, hi; 141df013409SKonstantin Belousov 142df013409SKonstantin Belousov low = mask; 143df013409SKonstantin Belousov hi = mask >> 32; 144df013409SKonstantin Belousov __asm __volatile("xsaveopt64 %0" : "=m" (*addr) : "a" (low), "d" (hi) : 145df013409SKonstantin Belousov "memory"); 146df013409SKonstantin Belousov } 147df013409SKonstantin Belousov 148cf4e1c46SPeter Wemm #else /* !(__GNUCLIKE_ASM && !lint) */ 1495b81b6b3SRodney W. Grimes 15017275403SJung-uk Kim void fldcw(u_short cw); 15189c9a483SAlfred Perlstein void fnclex(void); 15289c9a483SAlfred Perlstein void fninit(void); 15389c9a483SAlfred Perlstein void fnstcw(caddr_t addr); 15489c9a483SAlfred Perlstein void fnstsw(caddr_t addr); 15589c9a483SAlfred Perlstein void fxsave(caddr_t addr); 15689c9a483SAlfred Perlstein void fxrstor(caddr_t addr); 15707c86dcfSJung-uk Kim void ldmxcsr(u_int csr); 158a42fa0afSKonstantin Belousov void stmxcsr(u_int *csr); 159df013409SKonstantin Belousov void xrstor32(char *addr, uint64_t mask); 160df013409SKonstantin Belousov void xrstor64(char *addr, uint64_t mask); 161df013409SKonstantin Belousov void xsave32(char *addr, uint64_t mask); 162df013409SKonstantin Belousov void xsave64(char *addr, uint64_t mask); 163df013409SKonstantin Belousov void xsaveopt32(char *addr, uint64_t mask); 164df013409SKonstantin Belousov void xsaveopt64(char *addr, uint64_t mask); 1655b81b6b3SRodney W. Grimes 166cf4e1c46SPeter Wemm #endif /* __GNUCLIKE_ASM && !lint */ 1675b81b6b3SRodney W. Grimes 168d706ec29SJohn Baldwin #define start_emulating() load_cr0(rcr0() | CR0_TS) 169d706ec29SJohn Baldwin #define stop_emulating() clts() 170d706ec29SJohn Baldwin 1718c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct savefpu) == 512); 1728c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct xstate_hdr) == 64); 1738c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct savefpu_ymm) == 832); 1748c6f8f3dSKonstantin Belousov 1758c6f8f3dSKonstantin Belousov /* 1768c6f8f3dSKonstantin Belousov * This requirement is to make it easier for asm code to calculate 1778c6f8f3dSKonstantin Belousov * offset of the fpu save area from the pcb address. FPU save area 178b74a2290SKonstantin Belousov * must be 64-byte aligned. 1798c6f8f3dSKonstantin Belousov */ 1808c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); 1815b81b6b3SRodney W. Grimes 182180e57e5SJohn Baldwin /* 183180e57e5SJohn Baldwin * Ensure the copy of XCR0 saved in a core is contained in the padding 184180e57e5SJohn Baldwin * area. 185180e57e5SJohn Baldwin */ 186180e57e5SJohn Baldwin CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && 187180e57e5SJohn Baldwin X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); 188180e57e5SJohn Baldwin 1892652af56SColin Percival static void fpu_clean_state(void); 1902652af56SColin Percival 1910b7dc0a7SJohn Baldwin SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 192f0188618SHans Petter Selasky SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); 1933a34a5c3SPoul-Henning Kamp 1948c6f8f3dSKonstantin Belousov int use_xsave; /* non-static for cpu_switch.S */ 1958c6f8f3dSKonstantin Belousov uint64_t xsave_mask; /* the same */ 1962741efecSPeter Grehan static uma_zone_t fpu_save_area_zone; 1978c6f8f3dSKonstantin Belousov static struct savefpu *fpu_initialstate; 1988c6f8f3dSKonstantin Belousov 199701acc2fSEric van Gyzen static struct xsave_area_elm_descr { 200333d0c60SKonstantin Belousov u_int offset; 201333d0c60SKonstantin Belousov u_int size; 202333d0c60SKonstantin Belousov } *xsave_area_desc; 203333d0c60SKonstantin Belousov 2048b4fc8b1SKonstantin Belousov static void 205df013409SKonstantin Belousov fpusave_xsaveopt64(void *addr) 2068207def1SConrad Meyer { 207df013409SKonstantin Belousov xsaveopt64((char *)addr, xsave_mask); 2088207def1SConrad Meyer } 2098207def1SConrad Meyer 2108207def1SConrad Meyer static void 211df013409SKonstantin Belousov fpusave_xsaveopt3264(void *addr) 2128c6f8f3dSKonstantin Belousov { 213df013409SKonstantin Belousov if (SV_CURPROC_FLAG(SV_ILP32)) 214df013409SKonstantin Belousov xsaveopt32((char *)addr, xsave_mask); 215df013409SKonstantin Belousov else 216df013409SKonstantin Belousov xsaveopt64((char *)addr, xsave_mask); 2178b4fc8b1SKonstantin Belousov } 2188b4fc8b1SKonstantin Belousov 2198b4fc8b1SKonstantin Belousov static void 220df013409SKonstantin Belousov fpusave_xsave64(void *addr) 2218b4fc8b1SKonstantin Belousov { 222df013409SKonstantin Belousov xsave64((char *)addr, xsave_mask); 223df013409SKonstantin Belousov } 2248b4fc8b1SKonstantin Belousov 225df013409SKonstantin Belousov static void 226df013409SKonstantin Belousov fpusave_xsave3264(void *addr) 227df013409SKonstantin Belousov { 228df013409SKonstantin Belousov if (SV_CURPROC_FLAG(SV_ILP32)) 229df013409SKonstantin Belousov xsave32((char *)addr, xsave_mask); 230df013409SKonstantin Belousov else 231df013409SKonstantin Belousov xsave64((char *)addr, xsave_mask); 232df013409SKonstantin Belousov } 233df013409SKonstantin Belousov 234df013409SKonstantin Belousov static void 235df013409SKonstantin Belousov fpurestore_xrstor64(void *addr) 236df013409SKonstantin Belousov { 237df013409SKonstantin Belousov xrstor64((char *)addr, xsave_mask); 238df013409SKonstantin Belousov } 239df013409SKonstantin Belousov 240df013409SKonstantin Belousov static void 241df013409SKonstantin Belousov fpurestore_xrstor3264(void *addr) 242df013409SKonstantin Belousov { 243df013409SKonstantin Belousov if (SV_CURPROC_FLAG(SV_ILP32)) 244df013409SKonstantin Belousov xrstor32((char *)addr, xsave_mask); 245df013409SKonstantin Belousov else 246df013409SKonstantin Belousov xrstor64((char *)addr, xsave_mask); 2478b4fc8b1SKonstantin Belousov } 2488b4fc8b1SKonstantin Belousov 2498b4fc8b1SKonstantin Belousov static void 2508b4fc8b1SKonstantin Belousov fpusave_fxsave(void *addr) 2518b4fc8b1SKonstantin Belousov { 2528b4fc8b1SKonstantin Belousov 2538c6f8f3dSKonstantin Belousov fxsave((char *)addr); 2548c6f8f3dSKonstantin Belousov } 2558c6f8f3dSKonstantin Belousov 2568b4fc8b1SKonstantin Belousov static void 2578b4fc8b1SKonstantin Belousov fpurestore_fxrstor(void *addr) 2588b4fc8b1SKonstantin Belousov { 2598b4fc8b1SKonstantin Belousov 2608b4fc8b1SKonstantin Belousov fxrstor((char *)addr); 2618b4fc8b1SKonstantin Belousov } 2628b4fc8b1SKonstantin Belousov 2638b4fc8b1SKonstantin Belousov static void 2648b4fc8b1SKonstantin Belousov init_xsave(void) 2658c6f8f3dSKonstantin Belousov { 2668c6f8f3dSKonstantin Belousov 2678c6f8f3dSKonstantin Belousov if (use_xsave) 2688b4fc8b1SKonstantin Belousov return; 2698b4fc8b1SKonstantin Belousov if ((cpu_feature2 & CPUID2_XSAVE) == 0) 2708b4fc8b1SKonstantin Belousov return; 2718b4fc8b1SKonstantin Belousov use_xsave = 1; 2728b4fc8b1SKonstantin Belousov TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); 2738b4fc8b1SKonstantin Belousov } 2748b4fc8b1SKonstantin Belousov 2757c5a46a1SKonstantin Belousov DEFINE_IFUNC(, void, fpusave, (void *)) 2768b4fc8b1SKonstantin Belousov { 2778b4fc8b1SKonstantin Belousov 2788b4fc8b1SKonstantin Belousov init_xsave(); 279df013409SKonstantin Belousov if (!use_xsave) 2808207def1SConrad Meyer return (fpusave_fxsave); 281df013409SKonstantin Belousov if ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0) { 282df013409SKonstantin Belousov return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? 283df013409SKonstantin Belousov fpusave_xsaveopt64 : fpusave_xsaveopt3264); 284df013409SKonstantin Belousov } 285df013409SKonstantin Belousov return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? 286df013409SKonstantin Belousov fpusave_xsave64 : fpusave_xsave3264); 2878b4fc8b1SKonstantin Belousov } 2888b4fc8b1SKonstantin Belousov 2897c5a46a1SKonstantin Belousov DEFINE_IFUNC(, void, fpurestore, (void *)) 2908b4fc8b1SKonstantin Belousov { 2918b4fc8b1SKonstantin Belousov 2928b4fc8b1SKonstantin Belousov init_xsave(); 293df013409SKonstantin Belousov if (!use_xsave) 294df013409SKonstantin Belousov return (fpurestore_fxrstor); 295df013409SKonstantin Belousov return ((cpu_stdext_feature & CPUID_STDEXT_NFPUSG) != 0 ? 296df013409SKonstantin Belousov fpurestore_xrstor64 : fpurestore_xrstor3264); 2978c6f8f3dSKonstantin Belousov } 2983902c3efSSteve Passe 2991d22d877SJung-uk Kim void 3001d22d877SJung-uk Kim fpususpend(void *addr) 3011d22d877SJung-uk Kim { 3021d22d877SJung-uk Kim u_long cr0; 3031d22d877SJung-uk Kim 3041d22d877SJung-uk Kim cr0 = rcr0(); 3051d22d877SJung-uk Kim stop_emulating(); 3061d22d877SJung-uk Kim fpusave(addr); 3071d22d877SJung-uk Kim load_cr0(cr0); 3081d22d877SJung-uk Kim } 3091d22d877SJung-uk Kim 310b1d735baSJohn Baldwin void 311b1d735baSJohn Baldwin fpuresume(void *addr) 312b1d735baSJohn Baldwin { 313b1d735baSJohn Baldwin u_long cr0; 314b1d735baSJohn Baldwin 315b1d735baSJohn Baldwin cr0 = rcr0(); 316b1d735baSJohn Baldwin stop_emulating(); 317b1d735baSJohn Baldwin fninit(); 318b1d735baSJohn Baldwin if (use_xsave) 319b1d735baSJohn Baldwin load_xcr(XCR0, xsave_mask); 320b1d735baSJohn Baldwin fpurestore(addr); 321b1d735baSJohn Baldwin load_cr0(cr0); 322b1d735baSJohn Baldwin } 323b1d735baSJohn Baldwin 3245b81b6b3SRodney W. Grimes /* 3258c6f8f3dSKonstantin Belousov * Enable XSAVE if supported and allowed by user. 3268c6f8f3dSKonstantin Belousov * Calculate the xsave_mask. 3278c6f8f3dSKonstantin Belousov */ 3288c6f8f3dSKonstantin Belousov static void 3298c6f8f3dSKonstantin Belousov fpuinit_bsp1(void) 3308c6f8f3dSKonstantin Belousov { 3318c6f8f3dSKonstantin Belousov u_int cp[4]; 3328c6f8f3dSKonstantin Belousov uint64_t xsave_mask_user; 3339cffc92cSKonstantin Belousov bool old_wp; 3348c6f8f3dSKonstantin Belousov 3358c6f8f3dSKonstantin Belousov if (!use_xsave) 3368c6f8f3dSKonstantin Belousov return; 3378c6f8f3dSKonstantin Belousov cpuid_count(0xd, 0x0, cp); 3388c6f8f3dSKonstantin Belousov xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 3398c6f8f3dSKonstantin Belousov if ((cp[0] & xsave_mask) != xsave_mask) 3408c6f8f3dSKonstantin Belousov panic("CPU0 does not support X87 or SSE: %x", cp[0]); 3418c6f8f3dSKonstantin Belousov xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; 3428c6f8f3dSKonstantin Belousov xsave_mask_user = xsave_mask; 3438c6f8f3dSKonstantin Belousov TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); 3448c6f8f3dSKonstantin Belousov xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 3458c6f8f3dSKonstantin Belousov xsave_mask &= xsave_mask_user; 3460eb7ae8dSJohn Baldwin if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) 3470eb7ae8dSJohn Baldwin xsave_mask &= ~XFEATURE_AVX512; 3480eb7ae8dSJohn Baldwin if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) 3490eb7ae8dSJohn Baldwin xsave_mask &= ~XFEATURE_MPX; 350333d0c60SKonstantin Belousov 351333d0c60SKonstantin Belousov cpuid_count(0xd, 0x1, cp); 352333d0c60SKonstantin Belousov if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { 353333d0c60SKonstantin Belousov /* 354333d0c60SKonstantin Belousov * Patch the XSAVE instruction in the cpu_switch code 355333d0c60SKonstantin Belousov * to XSAVEOPT. We assume that XSAVE encoding used 356333d0c60SKonstantin Belousov * REX byte, and set the bit 4 of the r/m byte. 3579cffc92cSKonstantin Belousov * 3589cffc92cSKonstantin Belousov * It seems that some BIOSes give control to the OS 3599cffc92cSKonstantin Belousov * with CR0.WP already set, making the kernel text 3609cffc92cSKonstantin Belousov * read-only before cpu_startup(). 361333d0c60SKonstantin Belousov */ 3629cffc92cSKonstantin Belousov old_wp = disable_wp(); 363df013409SKonstantin Belousov ctx_switch_xsave32[3] |= 0x10; 364333d0c60SKonstantin Belousov ctx_switch_xsave[3] |= 0x10; 3659cffc92cSKonstantin Belousov restore_wp(old_wp); 366333d0c60SKonstantin Belousov } 3678c6f8f3dSKonstantin Belousov } 3688c6f8f3dSKonstantin Belousov 3698c6f8f3dSKonstantin Belousov /* 3708c6f8f3dSKonstantin Belousov * Calculate the fpu save area size. 3718c6f8f3dSKonstantin Belousov */ 3728c6f8f3dSKonstantin Belousov static void 3738c6f8f3dSKonstantin Belousov fpuinit_bsp2(void) 3748c6f8f3dSKonstantin Belousov { 3758c6f8f3dSKonstantin Belousov u_int cp[4]; 3768c6f8f3dSKonstantin Belousov 3778c6f8f3dSKonstantin Belousov if (use_xsave) { 3788c6f8f3dSKonstantin Belousov cpuid_count(0xd, 0x0, cp); 3798c6f8f3dSKonstantin Belousov cpu_max_ext_state_size = cp[1]; 3808c6f8f3dSKonstantin Belousov 3818c6f8f3dSKonstantin Belousov /* 3828c6f8f3dSKonstantin Belousov * Reload the cpu_feature2, since we enabled OSXSAVE. 3838c6f8f3dSKonstantin Belousov */ 3848c6f8f3dSKonstantin Belousov do_cpuid(1, cp); 3858c6f8f3dSKonstantin Belousov cpu_feature2 = cp[2]; 3868c6f8f3dSKonstantin Belousov } else 3878c6f8f3dSKonstantin Belousov cpu_max_ext_state_size = sizeof(struct savefpu); 3888c6f8f3dSKonstantin Belousov } 3898c6f8f3dSKonstantin Belousov 3908c6f8f3dSKonstantin Belousov /* 3918c6f8f3dSKonstantin Belousov * Initialize the floating point unit. 392da4113b3SPeter Wemm */ 393398dbb11SPeter Wemm void 3941c89210cSPeter Wemm fpuinit(void) 395da4113b3SPeter Wemm { 3960689bdccSJohn Baldwin register_t saveintr; 39796a7759eSPeter Wemm u_int mxcsr; 398398dbb11SPeter Wemm u_short control; 399da4113b3SPeter Wemm 4008c6f8f3dSKonstantin Belousov if (IS_BSP()) 4018c6f8f3dSKonstantin Belousov fpuinit_bsp1(); 4028c6f8f3dSKonstantin Belousov 4038c6f8f3dSKonstantin Belousov if (use_xsave) { 4048c6f8f3dSKonstantin Belousov load_cr4(rcr4() | CR4_XSAVE); 4057574a595SJohn Baldwin load_xcr(XCR0, xsave_mask); 4068c6f8f3dSKonstantin Belousov } 4078c6f8f3dSKonstantin Belousov 4088c6f8f3dSKonstantin Belousov /* 4098c6f8f3dSKonstantin Belousov * XCR0 shall be set up before CPU can report the save area size. 4108c6f8f3dSKonstantin Belousov */ 4118c6f8f3dSKonstantin Belousov if (IS_BSP()) 4128c6f8f3dSKonstantin Belousov fpuinit_bsp2(); 4138c6f8f3dSKonstantin Belousov 41499753495SKonstantin Belousov /* 41599753495SKonstantin Belousov * It is too early for critical_enter() to work on AP. 41699753495SKonstantin Belousov */ 4170689bdccSJohn Baldwin saveintr = intr_disable(); 4185b81b6b3SRodney W. Grimes stop_emulating(); 4195b81b6b3SRodney W. Grimes fninit(); 420398dbb11SPeter Wemm control = __INITIAL_FPUCW__; 42117275403SJung-uk Kim fldcw(control); 42296a7759eSPeter Wemm mxcsr = __INITIAL_MXCSR__; 42396a7759eSPeter Wemm ldmxcsr(mxcsr); 424a8346a98SJohn Baldwin start_emulating(); 4250689bdccSJohn Baldwin intr_restore(saveintr); 4265b81b6b3SRodney W. Grimes } 4275b81b6b3SRodney W. Grimes 4285b81b6b3SRodney W. Grimes /* 4298c6f8f3dSKonstantin Belousov * On the boot CPU we generate a clean state that is used to 4308c6f8f3dSKonstantin Belousov * initialize the floating point unit when it is first used by a 4318c6f8f3dSKonstantin Belousov * process. 4328c6f8f3dSKonstantin Belousov */ 4338c6f8f3dSKonstantin Belousov static void 4348c6f8f3dSKonstantin Belousov fpuinitstate(void *arg __unused) 4358c6f8f3dSKonstantin Belousov { 436fdfe249bSKonstantin Belousov uint64_t *xstate_bv; 4378c6f8f3dSKonstantin Belousov register_t saveintr; 438333d0c60SKonstantin Belousov int cp[4], i, max_ext_n; 4398c6f8f3dSKonstantin Belousov 440674cbe79SEric van Gyzen /* Do potentially blocking operations before disabling interrupts. */ 441674cbe79SEric van Gyzen fpu_save_area_zone = uma_zcreate("FPU_save_area", 442674cbe79SEric van Gyzen cpu_max_ext_state_size, NULL, NULL, NULL, NULL, 443674cbe79SEric van Gyzen XSAVE_AREA_ALIGN - 1, 0); 4446fba90f2SEric van Gyzen fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); 445674cbe79SEric van Gyzen if (use_xsave) { 446674cbe79SEric van Gyzen max_ext_n = flsl(xsave_mask); 447674cbe79SEric van Gyzen xsave_area_desc = malloc(max_ext_n * sizeof(struct 448674cbe79SEric van Gyzen xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); 449674cbe79SEric van Gyzen } 450674cbe79SEric van Gyzen 451df8dd602SKonstantin Belousov cpu_thread_alloc(&thread0); 452df8dd602SKonstantin Belousov 4538c6f8f3dSKonstantin Belousov saveintr = intr_disable(); 4548c6f8f3dSKonstantin Belousov stop_emulating(); 4558c6f8f3dSKonstantin Belousov 4568207def1SConrad Meyer fpusave_fxsave(fpu_initialstate); 4578c6f8f3dSKonstantin Belousov if (fpu_initialstate->sv_env.en_mxcsr_mask) 4588c6f8f3dSKonstantin Belousov cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; 4598c6f8f3dSKonstantin Belousov else 4608c6f8f3dSKonstantin Belousov cpu_mxcsr_mask = 0xFFBF; 4618c6f8f3dSKonstantin Belousov 4628c6f8f3dSKonstantin Belousov /* 463b57e6814SKonstantin Belousov * The fninit instruction does not modify XMM registers or x87 464b57e6814SKonstantin Belousov * registers (MM/ST). The fpusave call dumped the garbage 465b57e6814SKonstantin Belousov * contained in the registers after reset to the initial state 466b57e6814SKonstantin Belousov * saved. Clear XMM and x87 registers file image to make the 467b57e6814SKonstantin Belousov * startup program state and signal handler XMM/x87 register 468b57e6814SKonstantin Belousov * content predictable. 4698c6f8f3dSKonstantin Belousov */ 470b57e6814SKonstantin Belousov bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); 471b57e6814SKonstantin Belousov bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); 4728c6f8f3dSKonstantin Belousov 473333d0c60SKonstantin Belousov /* 474333d0c60SKonstantin Belousov * Create a table describing the layout of the CPU Extended 475333d0c60SKonstantin Belousov * Save Area. 476333d0c60SKonstantin Belousov */ 47714f52559SKonstantin Belousov if (use_xsave) { 478fdfe249bSKonstantin Belousov xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) + 479fdfe249bSKonstantin Belousov offsetof(struct xstate_hdr, xstate_bv)); 480fdfe249bSKonstantin Belousov *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 481fdfe249bSKonstantin Belousov 482333d0c60SKonstantin Belousov /* x87 state */ 483333d0c60SKonstantin Belousov xsave_area_desc[0].offset = 0; 484333d0c60SKonstantin Belousov xsave_area_desc[0].size = 160; 485333d0c60SKonstantin Belousov /* XMM */ 486333d0c60SKonstantin Belousov xsave_area_desc[1].offset = 160; 487*73b357beSKonstantin Belousov xsave_area_desc[1].size = 416 - 160; 488333d0c60SKonstantin Belousov 489333d0c60SKonstantin Belousov for (i = 2; i < max_ext_n; i++) { 490333d0c60SKonstantin Belousov cpuid_count(0xd, i, cp); 491333d0c60SKonstantin Belousov xsave_area_desc[i].offset = cp[1]; 492333d0c60SKonstantin Belousov xsave_area_desc[i].size = cp[0]; 493333d0c60SKonstantin Belousov } 494333d0c60SKonstantin Belousov } 495333d0c60SKonstantin Belousov 4968c6f8f3dSKonstantin Belousov start_emulating(); 4978c6f8f3dSKonstantin Belousov intr_restore(saveintr); 4988c6f8f3dSKonstantin Belousov } 499ad456dd9SKyle Evans /* EFIRT needs this to be initialized before we can enter our EFI environment */ 500c56de177SKonstantin Belousov SYSINIT(fpuinitstate, SI_SUB_CPU, SI_ORDER_ANY, fpuinitstate, NULL); 5018c6f8f3dSKonstantin Belousov 5028c6f8f3dSKonstantin Belousov /* 5035b81b6b3SRodney W. Grimes * Free coprocessor (if we have it). 5045b81b6b3SRodney W. Grimes */ 5055b81b6b3SRodney W. Grimes void 506bf2f09eeSPeter Wemm fpuexit(struct thread *td) 5075b81b6b3SRodney W. Grimes { 5085b81b6b3SRodney W. Grimes 50999753495SKonstantin Belousov critical_enter(); 5101c89210cSPeter Wemm if (curthread == PCPU_GET(fpcurthread)) { 5111c89210cSPeter Wemm stop_emulating(); 51283b22b05SKonstantin Belousov fpusave(curpcb->pcb_save); 5131c89210cSPeter Wemm start_emulating(); 5146dfc9e44SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 5151c89210cSPeter Wemm } 51699753495SKonstantin Belousov critical_exit(); 5175b81b6b3SRodney W. Grimes } 5185b81b6b3SRodney W. Grimes 51930abe507SJonathan Mini int 520f132cd05SKonstantin Belousov fpuformat(void) 52130abe507SJonathan Mini { 52230abe507SJonathan Mini 52330abe507SJonathan Mini return (_MC_FPFMT_XMM); 52430abe507SJonathan Mini } 52530abe507SJonathan Mini 5265b81b6b3SRodney W. Grimes /* 527a7674320SMartin Cracauer * The following mechanism is used to ensure that the FPE_... value 528a7674320SMartin Cracauer * that is passed as a trapcode to the signal handler of the user 529a7674320SMartin Cracauer * process does not have more than one bit set. 530a7674320SMartin Cracauer * 531a7674320SMartin Cracauer * Multiple bits may be set if the user process modifies the control 532a7674320SMartin Cracauer * word while a status word bit is already set. While this is a sign 533a7674320SMartin Cracauer * of bad coding, we have no choise than to narrow them down to one 534a7674320SMartin Cracauer * bit, since we must not send a trapcode that is not exactly one of 535a7674320SMartin Cracauer * the FPE_ macros. 536a7674320SMartin Cracauer * 537a7674320SMartin Cracauer * The mechanism has a static table with 127 entries. Each combination 538a7674320SMartin Cracauer * of the 7 FPU status word exception bits directly translates to a 539a7674320SMartin Cracauer * position in this table, where a single FPE_... value is stored. 540a7674320SMartin Cracauer * This FPE_... value stored there is considered the "most important" 541a7674320SMartin Cracauer * of the exception bits and will be sent as the signal code. The 542a7674320SMartin Cracauer * precedence of the bits is based upon Intel Document "Numerical 543a7674320SMartin Cracauer * Applications", Chapter "Special Computational Situations". 544a7674320SMartin Cracauer * 545a7674320SMartin Cracauer * The macro to choose one of these values does these steps: 1) Throw 546a7674320SMartin Cracauer * away status word bits that cannot be masked. 2) Throw away the bits 547a7674320SMartin Cracauer * currently masked in the control word, assuming the user isn't 548a7674320SMartin Cracauer * interested in them anymore. 3) Reinsert status word bit 7 (stack 549a7674320SMartin Cracauer * fault) if it is set, which cannot be masked but must be presered. 550a7674320SMartin Cracauer * 4) Use the remaining bits to point into the trapcode table. 551a7674320SMartin Cracauer * 552a7674320SMartin Cracauer * The 6 maskable bits in order of their preference, as stated in the 553a7674320SMartin Cracauer * above referenced Intel manual: 554a7674320SMartin Cracauer * 1 Invalid operation (FP_X_INV) 555a7674320SMartin Cracauer * 1a Stack underflow 556a7674320SMartin Cracauer * 1b Stack overflow 557a7674320SMartin Cracauer * 1c Operand of unsupported format 558a7674320SMartin Cracauer * 1d SNaN operand. 559a7674320SMartin Cracauer * 2 QNaN operand (not an exception, irrelavant here) 560a7674320SMartin Cracauer * 3 Any other invalid-operation not mentioned above or zero divide 561a7674320SMartin Cracauer * (FP_X_INV, FP_X_DZ) 562a7674320SMartin Cracauer * 4 Denormal operand (FP_X_DNML) 563a7674320SMartin Cracauer * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 564784648c6SMartin Cracauer * 6 Inexact result (FP_X_IMP) 565784648c6SMartin Cracauer */ 566a7674320SMartin Cracauer static char fpetable[128] = { 567a7674320SMartin Cracauer 0, 568a7674320SMartin Cracauer FPE_FLTINV, /* 1 - INV */ 569a7674320SMartin Cracauer FPE_FLTUND, /* 2 - DNML */ 570a7674320SMartin Cracauer FPE_FLTINV, /* 3 - INV | DNML */ 571a7674320SMartin Cracauer FPE_FLTDIV, /* 4 - DZ */ 572a7674320SMartin Cracauer FPE_FLTINV, /* 5 - INV | DZ */ 573a7674320SMartin Cracauer FPE_FLTDIV, /* 6 - DNML | DZ */ 574a7674320SMartin Cracauer FPE_FLTINV, /* 7 - INV | DNML | DZ */ 575a7674320SMartin Cracauer FPE_FLTOVF, /* 8 - OFL */ 576a7674320SMartin Cracauer FPE_FLTINV, /* 9 - INV | OFL */ 577a7674320SMartin Cracauer FPE_FLTUND, /* A - DNML | OFL */ 578a7674320SMartin Cracauer FPE_FLTINV, /* B - INV | DNML | OFL */ 579a7674320SMartin Cracauer FPE_FLTDIV, /* C - DZ | OFL */ 580a7674320SMartin Cracauer FPE_FLTINV, /* D - INV | DZ | OFL */ 581a7674320SMartin Cracauer FPE_FLTDIV, /* E - DNML | DZ | OFL */ 582a7674320SMartin Cracauer FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 583a7674320SMartin Cracauer FPE_FLTUND, /* 10 - UFL */ 584a7674320SMartin Cracauer FPE_FLTINV, /* 11 - INV | UFL */ 585a7674320SMartin Cracauer FPE_FLTUND, /* 12 - DNML | UFL */ 586a7674320SMartin Cracauer FPE_FLTINV, /* 13 - INV | DNML | UFL */ 587a7674320SMartin Cracauer FPE_FLTDIV, /* 14 - DZ | UFL */ 588a7674320SMartin Cracauer FPE_FLTINV, /* 15 - INV | DZ | UFL */ 589a7674320SMartin Cracauer FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 590a7674320SMartin Cracauer FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 591a7674320SMartin Cracauer FPE_FLTOVF, /* 18 - OFL | UFL */ 592a7674320SMartin Cracauer FPE_FLTINV, /* 19 - INV | OFL | UFL */ 593a7674320SMartin Cracauer FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 594a7674320SMartin Cracauer FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 595a7674320SMartin Cracauer FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 596a7674320SMartin Cracauer FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 597a7674320SMartin Cracauer FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 598a7674320SMartin Cracauer FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 599a7674320SMartin Cracauer FPE_FLTRES, /* 20 - IMP */ 600a7674320SMartin Cracauer FPE_FLTINV, /* 21 - INV | IMP */ 601a7674320SMartin Cracauer FPE_FLTUND, /* 22 - DNML | IMP */ 602a7674320SMartin Cracauer FPE_FLTINV, /* 23 - INV | DNML | IMP */ 603a7674320SMartin Cracauer FPE_FLTDIV, /* 24 - DZ | IMP */ 604a7674320SMartin Cracauer FPE_FLTINV, /* 25 - INV | DZ | IMP */ 605a7674320SMartin Cracauer FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 606a7674320SMartin Cracauer FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 607a7674320SMartin Cracauer FPE_FLTOVF, /* 28 - OFL | IMP */ 608a7674320SMartin Cracauer FPE_FLTINV, /* 29 - INV | OFL | IMP */ 609a7674320SMartin Cracauer FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 610a7674320SMartin Cracauer FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 611a7674320SMartin Cracauer FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 612a7674320SMartin Cracauer FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 613a7674320SMartin Cracauer FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 614a7674320SMartin Cracauer FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 615a7674320SMartin Cracauer FPE_FLTUND, /* 30 - UFL | IMP */ 616a7674320SMartin Cracauer FPE_FLTINV, /* 31 - INV | UFL | IMP */ 617a7674320SMartin Cracauer FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 618a7674320SMartin Cracauer FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 619a7674320SMartin Cracauer FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 620a7674320SMartin Cracauer FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 621a7674320SMartin Cracauer FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 622a7674320SMartin Cracauer FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 623a7674320SMartin Cracauer FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 624a7674320SMartin Cracauer FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 625a7674320SMartin Cracauer FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 626a7674320SMartin Cracauer FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 627a7674320SMartin Cracauer FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 628a7674320SMartin Cracauer FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 629a7674320SMartin Cracauer FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 630a7674320SMartin Cracauer FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 631a7674320SMartin Cracauer FPE_FLTSUB, /* 40 - STK */ 632a7674320SMartin Cracauer FPE_FLTSUB, /* 41 - INV | STK */ 633a7674320SMartin Cracauer FPE_FLTUND, /* 42 - DNML | STK */ 634a7674320SMartin Cracauer FPE_FLTSUB, /* 43 - INV | DNML | STK */ 635a7674320SMartin Cracauer FPE_FLTDIV, /* 44 - DZ | STK */ 636a7674320SMartin Cracauer FPE_FLTSUB, /* 45 - INV | DZ | STK */ 637a7674320SMartin Cracauer FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 638a7674320SMartin Cracauer FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 639a7674320SMartin Cracauer FPE_FLTOVF, /* 48 - OFL | STK */ 640a7674320SMartin Cracauer FPE_FLTSUB, /* 49 - INV | OFL | STK */ 641a7674320SMartin Cracauer FPE_FLTUND, /* 4A - DNML | OFL | STK */ 642a7674320SMartin Cracauer FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 643a7674320SMartin Cracauer FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 644a7674320SMartin Cracauer FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 645a7674320SMartin Cracauer FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 646a7674320SMartin Cracauer FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 647a7674320SMartin Cracauer FPE_FLTUND, /* 50 - UFL | STK */ 648a7674320SMartin Cracauer FPE_FLTSUB, /* 51 - INV | UFL | STK */ 649a7674320SMartin Cracauer FPE_FLTUND, /* 52 - DNML | UFL | STK */ 650a7674320SMartin Cracauer FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 651a7674320SMartin Cracauer FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 652a7674320SMartin Cracauer FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 653a7674320SMartin Cracauer FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 654a7674320SMartin Cracauer FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 655a7674320SMartin Cracauer FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 656a7674320SMartin Cracauer FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 657a7674320SMartin Cracauer FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 658a7674320SMartin Cracauer FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 659a7674320SMartin Cracauer FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 660a7674320SMartin Cracauer FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 661a7674320SMartin Cracauer FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 662a7674320SMartin Cracauer FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 663a7674320SMartin Cracauer FPE_FLTRES, /* 60 - IMP | STK */ 664a7674320SMartin Cracauer FPE_FLTSUB, /* 61 - INV | IMP | STK */ 665a7674320SMartin Cracauer FPE_FLTUND, /* 62 - DNML | IMP | STK */ 666a7674320SMartin Cracauer FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 667a7674320SMartin Cracauer FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 668a7674320SMartin Cracauer FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 669a7674320SMartin Cracauer FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 670a7674320SMartin Cracauer FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 671a7674320SMartin Cracauer FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 672a7674320SMartin Cracauer FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 673a7674320SMartin Cracauer FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 674a7674320SMartin Cracauer FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 675a7674320SMartin Cracauer FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 676a7674320SMartin Cracauer FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 677a7674320SMartin Cracauer FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 678a7674320SMartin Cracauer FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 679a7674320SMartin Cracauer FPE_FLTUND, /* 70 - UFL | IMP | STK */ 680a7674320SMartin Cracauer FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 681a7674320SMartin Cracauer FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 682a7674320SMartin Cracauer FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 683a7674320SMartin Cracauer FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 684a7674320SMartin Cracauer FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 685a7674320SMartin Cracauer FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 686a7674320SMartin Cracauer FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 687a7674320SMartin Cracauer FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 688a7674320SMartin Cracauer FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 689a7674320SMartin Cracauer FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 690a7674320SMartin Cracauer FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 691a7674320SMartin Cracauer FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 692a7674320SMartin Cracauer FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 693a7674320SMartin Cracauer FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 694a7674320SMartin Cracauer FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 695a7674320SMartin Cracauer }; 696a7674320SMartin Cracauer 697a7674320SMartin Cracauer /* 698dfa8a512SKonstantin Belousov * Read the FP status and control words, then generate si_code value 699dfa8a512SKonstantin Belousov * for SIGFPE. The error code chosen will be one of the 700dfa8a512SKonstantin Belousov * FPE_... macros. It will be sent as the second argument to old 701dfa8a512SKonstantin Belousov * BSD-style signal handlers and as "siginfo_t->si_code" (second 702dfa8a512SKonstantin Belousov * argument) to SA_SIGINFO signal handlers. 7035b81b6b3SRodney W. Grimes * 704dfa8a512SKonstantin Belousov * Some time ago, we cleared the x87 exceptions with FNCLEX there. 705dfa8a512SKonstantin Belousov * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The 706dfa8a512SKonstantin Belousov * usermode code which understands the FPU hardware enough to enable 707dfa8a512SKonstantin Belousov * the exceptions, can also handle clearing the exception state in the 708dfa8a512SKonstantin Belousov * handler. The only consequence of not clearing the exception is the 709dfa8a512SKonstantin Belousov * rethrow of the SIGFPE on return from the signal handler and 710dfa8a512SKonstantin Belousov * reexecution of the corresponding instruction. 711bc84db62SKonstantin Belousov * 712dfa8a512SKonstantin Belousov * For XMM traps, the exceptions were never cleared. 7135b81b6b3SRodney W. Grimes */ 7141c1771cbSBruce Evans int 715bc84db62SKonstantin Belousov fputrap_x87(void) 7165b81b6b3SRodney W. Grimes { 717bc84db62SKonstantin Belousov struct savefpu *pcb_save; 7181c1771cbSBruce Evans u_short control, status; 7195b81b6b3SRodney W. Grimes 72099753495SKonstantin Belousov critical_enter(); 7215b81b6b3SRodney W. Grimes 7225b81b6b3SRodney W. Grimes /* 7231c1771cbSBruce Evans * Interrupt handling (for another interrupt) may have pushed the 7241c1771cbSBruce Evans * state to memory. Fetch the relevant parts of the state from 7251c1771cbSBruce Evans * wherever they are. 7265b81b6b3SRodney W. Grimes */ 7270bbc8826SJohn Baldwin if (PCPU_GET(fpcurthread) != curthread) { 72883b22b05SKonstantin Belousov pcb_save = curpcb->pcb_save; 729bc84db62SKonstantin Belousov control = pcb_save->sv_env.en_cw; 730bc84db62SKonstantin Belousov status = pcb_save->sv_env.en_sw; 7315b81b6b3SRodney W. Grimes } else { 7321c1771cbSBruce Evans fnstcw(&control); 7331c1771cbSBruce Evans fnstsw(&status); 7345b81b6b3SRodney W. Grimes } 7351c1771cbSBruce Evans 73699753495SKonstantin Belousov critical_exit(); 7371c1771cbSBruce Evans return (fpetable[status & ((~control & 0x3f) | 0x40)]); 7385b81b6b3SRodney W. Grimes } 7395b81b6b3SRodney W. Grimes 740bc84db62SKonstantin Belousov int 741bc84db62SKonstantin Belousov fputrap_sse(void) 742bc84db62SKonstantin Belousov { 743bc84db62SKonstantin Belousov u_int mxcsr; 744bc84db62SKonstantin Belousov 745bc84db62SKonstantin Belousov critical_enter(); 746bc84db62SKonstantin Belousov if (PCPU_GET(fpcurthread) != curthread) 74783b22b05SKonstantin Belousov mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; 748bc84db62SKonstantin Belousov else 749bc84db62SKonstantin Belousov stmxcsr(&mxcsr); 750bc84db62SKonstantin Belousov critical_exit(); 751bc84db62SKonstantin Belousov return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); 752bc84db62SKonstantin Belousov } 753bc84db62SKonstantin Belousov 754d1a07e31SKonstantin Belousov static void 755d1a07e31SKonstantin Belousov restore_fpu_curthread(struct thread *td) 756d1a07e31SKonstantin Belousov { 757d1a07e31SKonstantin Belousov struct pcb *pcb; 758d1a07e31SKonstantin Belousov 759d1a07e31SKonstantin Belousov /* 760d1a07e31SKonstantin Belousov * Record new context early in case frstor causes a trap. 761d1a07e31SKonstantin Belousov */ 762d1a07e31SKonstantin Belousov PCPU_SET(fpcurthread, td); 763d1a07e31SKonstantin Belousov 764d1a07e31SKonstantin Belousov stop_emulating(); 765d1a07e31SKonstantin Belousov fpu_clean_state(); 766d1a07e31SKonstantin Belousov pcb = td->td_pcb; 767d1a07e31SKonstantin Belousov 768d1a07e31SKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { 769d1a07e31SKonstantin Belousov /* 770d1a07e31SKonstantin Belousov * This is the first time this thread has used the FPU or 771d1a07e31SKonstantin Belousov * the PCB doesn't contain a clean FPU state. Explicitly 772d1a07e31SKonstantin Belousov * load an initial state. 773d1a07e31SKonstantin Belousov * 774d1a07e31SKonstantin Belousov * We prefer to restore the state from the actual save 775d1a07e31SKonstantin Belousov * area in PCB instead of directly loading from 776d1a07e31SKonstantin Belousov * fpu_initialstate, to ignite the XSAVEOPT 777d1a07e31SKonstantin Belousov * tracking engine. 778d1a07e31SKonstantin Belousov */ 779d1a07e31SKonstantin Belousov bcopy(fpu_initialstate, pcb->pcb_save, 780d1a07e31SKonstantin Belousov cpu_max_ext_state_size); 781d1a07e31SKonstantin Belousov fpurestore(pcb->pcb_save); 782d1a07e31SKonstantin Belousov if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) 783d1a07e31SKonstantin Belousov fldcw(pcb->pcb_initial_fpucw); 784d1a07e31SKonstantin Belousov if (PCB_USER_FPU(pcb)) 785d1a07e31SKonstantin Belousov set_pcb_flags(pcb, PCB_FPUINITDONE | 786d1a07e31SKonstantin Belousov PCB_USERFPUINITDONE); 787d1a07e31SKonstantin Belousov else 788d1a07e31SKonstantin Belousov set_pcb_flags(pcb, PCB_FPUINITDONE); 789d1a07e31SKonstantin Belousov } else 790d1a07e31SKonstantin Belousov fpurestore(pcb->pcb_save); 791d1a07e31SKonstantin Belousov } 792d1a07e31SKonstantin Belousov 7936dfc9e44SKonstantin Belousov /* 7946dfc9e44SKonstantin Belousov * Device Not Available (DNA, #NM) exception handler. 7956dfc9e44SKonstantin Belousov * 7966dfc9e44SKonstantin Belousov * It would be better to switch FP context here (if curthread != 7976dfc9e44SKonstantin Belousov * fpcurthread) and not necessarily for every context switch, but it 7986dfc9e44SKonstantin Belousov * is too hard to access foreign pcb's. 7996dfc9e44SKonstantin Belousov */ 800a8346a98SJohn Baldwin void 801a8346a98SJohn Baldwin fpudna(void) 8025b81b6b3SRodney W. Grimes { 803d1a07e31SKonstantin Belousov struct thread *td; 80405f6ee66SJake Burkholder 805d1a07e31SKonstantin Belousov td = curthread; 806060cd4d5SKonstantin Belousov /* 807060cd4d5SKonstantin Belousov * This handler is entered with interrupts enabled, so context 808060cd4d5SKonstantin Belousov * switches may occur before critical_enter() is executed. If 809060cd4d5SKonstantin Belousov * a context switch occurs, then when we regain control, our 810060cd4d5SKonstantin Belousov * state will have been completely restored. The CPU may 811060cd4d5SKonstantin Belousov * change underneath us, but the only part of our context that 812060cd4d5SKonstantin Belousov * lives in the CPU is CR0.TS and that will be "restored" by 813060cd4d5SKonstantin Belousov * setting it on the new CPU. 814060cd4d5SKonstantin Belousov */ 81599753495SKonstantin Belousov critical_enter(); 816060cd4d5SKonstantin Belousov 817cf1c4776SKonstantin Belousov KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, 818cf1c4776SKonstantin Belousov ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); 8195803d744SKonstantin Belousov if (__predict_false(PCPU_GET(fpcurthread) == td)) { 820fa7fad8aSKonstantin Belousov /* 821fa7fad8aSKonstantin Belousov * Some virtual machines seems to set %cr0.TS at 822fa7fad8aSKonstantin Belousov * arbitrary moments. Silently clear the TS bit 823fa7fad8aSKonstantin Belousov * regardless of the eager/lazy FPU context switch 824fa7fad8aSKonstantin Belousov * mode. 825fa7fad8aSKonstantin Belousov */ 82630abe507SJonathan Mini stop_emulating(); 8275803d744SKonstantin Belousov } else { 8285803d744SKonstantin Belousov if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { 8295803d744SKonstantin Belousov panic( 8305803d744SKonstantin Belousov "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", 8315803d744SKonstantin Belousov PCPU_GET(fpcurthread), 8325803d744SKonstantin Belousov PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); 8335b81b6b3SRodney W. Grimes } 834d1a07e31SKonstantin Belousov restore_fpu_curthread(td); 8355803d744SKonstantin Belousov } 83699753495SKonstantin Belousov critical_exit(); 8375b81b6b3SRodney W. Grimes } 8385b81b6b3SRodney W. Grimes 839d1a07e31SKonstantin Belousov void fpu_activate_sw(struct thread *td); /* Called from the context switch */ 840d1a07e31SKonstantin Belousov void 841d1a07e31SKonstantin Belousov fpu_activate_sw(struct thread *td) 842d1a07e31SKonstantin Belousov { 843d1a07e31SKonstantin Belousov 84499b81dcbSKonstantin Belousov if ((td->td_pflags & TDP_KTHREAD) != 0 || !PCB_USER_FPU(td->td_pcb)) { 845d1a07e31SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 846d1a07e31SKonstantin Belousov start_emulating(); 847d1a07e31SKonstantin Belousov } else if (PCPU_GET(fpcurthread) != td) { 848d1a07e31SKonstantin Belousov restore_fpu_curthread(td); 849d1a07e31SKonstantin Belousov } 850d1a07e31SKonstantin Belousov } 851d1a07e31SKonstantin Belousov 85230abe507SJonathan Mini void 853f132cd05SKonstantin Belousov fpudrop(void) 85430abe507SJonathan Mini { 85530abe507SJonathan Mini struct thread *td; 85630abe507SJonathan Mini 85730abe507SJonathan Mini td = PCPU_GET(fpcurthread); 85899753495SKonstantin Belousov KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 8594a23ecc7SKonstantin Belousov CRITICAL_ASSERT(td); 86030abe507SJonathan Mini PCPU_SET(fpcurthread, NULL); 861e6c006d9SJung-uk Kim clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); 86230abe507SJonathan Mini start_emulating(); 86330abe507SJonathan Mini } 86430abe507SJonathan Mini 86530abe507SJonathan Mini /* 8665c6eb037SKonstantin Belousov * Get the user state of the FPU into pcb->pcb_user_save without 8675c6eb037SKonstantin Belousov * dropping ownership (if possible). It returns the FPU ownership 8685c6eb037SKonstantin Belousov * status. 86930abe507SJonathan Mini */ 87030abe507SJonathan Mini int 8715c6eb037SKonstantin Belousov fpugetregs(struct thread *td) 8726cf9a08dSKonstantin Belousov { 8736cf9a08dSKonstantin Belousov struct pcb *pcb; 874333d0c60SKonstantin Belousov uint64_t *xstate_bv, bit; 875333d0c60SKonstantin Belousov char *sa; 87614f52559SKonstantin Belousov int max_ext_n, i, owned; 8776cf9a08dSKonstantin Belousov 8786cf9a08dSKonstantin Belousov pcb = td->td_pcb; 87941bed185SKonstantin Belousov critical_enter(); 8806cf9a08dSKonstantin Belousov if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { 8818c6f8f3dSKonstantin Belousov bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), 8828c6f8f3dSKonstantin Belousov cpu_max_ext_state_size); 8838c6f8f3dSKonstantin Belousov get_pcb_user_save_pcb(pcb)->sv_env.en_cw = 8848c6f8f3dSKonstantin Belousov pcb->pcb_initial_fpucw; 8855c6eb037SKonstantin Belousov fpuuserinited(td); 88641bed185SKonstantin Belousov critical_exit(); 8875c6eb037SKonstantin Belousov return (_MC_FPOWNED_PCB); 8886cf9a08dSKonstantin Belousov } 8896cf9a08dSKonstantin Belousov if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 8908c6f8f3dSKonstantin Belousov fpusave(get_pcb_user_save_pcb(pcb)); 89114f52559SKonstantin Belousov owned = _MC_FPOWNED_FPU; 8926cf9a08dSKonstantin Belousov } else { 89314f52559SKonstantin Belousov owned = _MC_FPOWNED_PCB; 89414f52559SKonstantin Belousov } 89514f52559SKonstantin Belousov if (use_xsave) { 896333d0c60SKonstantin Belousov /* 897333d0c60SKonstantin Belousov * Handle partially saved state. 898333d0c60SKonstantin Belousov */ 899333d0c60SKonstantin Belousov sa = (char *)get_pcb_user_save_pcb(pcb); 900333d0c60SKonstantin Belousov xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + 901333d0c60SKonstantin Belousov offsetof(struct xstate_hdr, xstate_bv)); 902333d0c60SKonstantin Belousov max_ext_n = flsl(xsave_mask); 903333d0c60SKonstantin Belousov for (i = 0; i < max_ext_n; i++) { 904241b67bbSKonstantin Belousov bit = 1ULL << i; 905241b67bbSKonstantin Belousov if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) 906333d0c60SKonstantin Belousov continue; 907333d0c60SKonstantin Belousov bcopy((char *)fpu_initialstate + 908333d0c60SKonstantin Belousov xsave_area_desc[i].offset, 909333d0c60SKonstantin Belousov sa + xsave_area_desc[i].offset, 910333d0c60SKonstantin Belousov xsave_area_desc[i].size); 911333d0c60SKonstantin Belousov *xstate_bv |= bit; 912333d0c60SKonstantin Belousov } 913333d0c60SKonstantin Belousov } 91441bed185SKonstantin Belousov critical_exit(); 91514f52559SKonstantin Belousov return (owned); 9166cf9a08dSKonstantin Belousov } 9176cf9a08dSKonstantin Belousov 9185c6eb037SKonstantin Belousov void 9195c6eb037SKonstantin Belousov fpuuserinited(struct thread *td) 92030abe507SJonathan Mini { 9216cf9a08dSKonstantin Belousov struct pcb *pcb; 92230abe507SJonathan Mini 92341bed185SKonstantin Belousov CRITICAL_ASSERT(td); 9246cf9a08dSKonstantin Belousov pcb = td->td_pcb; 9255c6eb037SKonstantin Belousov if (PCB_USER_FPU(pcb)) 926e6c006d9SJung-uk Kim set_pcb_flags(pcb, 927e6c006d9SJung-uk Kim PCB_FPUINITDONE | PCB_USERFPUINITDONE); 928e6c006d9SJung-uk Kim else 929e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 93030abe507SJonathan Mini } 93130abe507SJonathan Mini 9328c6f8f3dSKonstantin Belousov int 9338c6f8f3dSKonstantin Belousov fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) 9348c6f8f3dSKonstantin Belousov { 9358c6f8f3dSKonstantin Belousov struct xstate_hdr *hdr, *ehdr; 9368c6f8f3dSKonstantin Belousov size_t len, max_len; 9378c6f8f3dSKonstantin Belousov uint64_t bv; 9388c6f8f3dSKonstantin Belousov 9398c6f8f3dSKonstantin Belousov /* XXXKIB should we clear all extended state in xstate_bv instead ? */ 9408c6f8f3dSKonstantin Belousov if (xfpustate == NULL) 9418c6f8f3dSKonstantin Belousov return (0); 9428c6f8f3dSKonstantin Belousov if (!use_xsave) 9438c6f8f3dSKonstantin Belousov return (EOPNOTSUPP); 9448c6f8f3dSKonstantin Belousov 9458c6f8f3dSKonstantin Belousov len = xfpustate_size; 9468c6f8f3dSKonstantin Belousov if (len < sizeof(struct xstate_hdr)) 9478c6f8f3dSKonstantin Belousov return (EINVAL); 9488c6f8f3dSKonstantin Belousov max_len = cpu_max_ext_state_size - sizeof(struct savefpu); 9498c6f8f3dSKonstantin Belousov if (len > max_len) 9508c6f8f3dSKonstantin Belousov return (EINVAL); 9518c6f8f3dSKonstantin Belousov 9528c6f8f3dSKonstantin Belousov ehdr = (struct xstate_hdr *)xfpustate; 9538c6f8f3dSKonstantin Belousov bv = ehdr->xstate_bv; 9548c6f8f3dSKonstantin Belousov 9558c6f8f3dSKonstantin Belousov /* 9568c6f8f3dSKonstantin Belousov * Avoid #gp. 9578c6f8f3dSKonstantin Belousov */ 9588c6f8f3dSKonstantin Belousov if (bv & ~xsave_mask) 9598c6f8f3dSKonstantin Belousov return (EINVAL); 9608c6f8f3dSKonstantin Belousov 9618c6f8f3dSKonstantin Belousov hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); 9628c6f8f3dSKonstantin Belousov 9638c6f8f3dSKonstantin Belousov hdr->xstate_bv = bv; 9648c6f8f3dSKonstantin Belousov bcopy(xfpustate + sizeof(struct xstate_hdr), 9658c6f8f3dSKonstantin Belousov (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); 9668c6f8f3dSKonstantin Belousov 9678c6f8f3dSKonstantin Belousov return (0); 9688c6f8f3dSKonstantin Belousov } 9698c6f8f3dSKonstantin Belousov 97030abe507SJonathan Mini /* 97130abe507SJonathan Mini * Set the state of the FPU. 97230abe507SJonathan Mini */ 9738c6f8f3dSKonstantin Belousov int 9748c6f8f3dSKonstantin Belousov fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, 9758c6f8f3dSKonstantin Belousov size_t xfpustate_size) 9766cf9a08dSKonstantin Belousov { 9776cf9a08dSKonstantin Belousov struct pcb *pcb; 9788c6f8f3dSKonstantin Belousov int error; 9796cf9a08dSKonstantin Belousov 980aa788cc3SKonstantin Belousov addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; 9816cf9a08dSKonstantin Belousov pcb = td->td_pcb; 98241bed185SKonstantin Belousov error = 0; 98399753495SKonstantin Belousov critical_enter(); 9846cf9a08dSKonstantin Belousov if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 9858c6f8f3dSKonstantin Belousov error = fpusetxstate(td, xfpustate, xfpustate_size); 98641bed185SKonstantin Belousov if (error == 0) { 9878c6f8f3dSKonstantin Belousov bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 9888c6f8f3dSKonstantin Belousov fpurestore(get_pcb_user_save_td(td)); 98941bed185SKonstantin Belousov set_pcb_flags(pcb, PCB_FPUINITDONE | 99041bed185SKonstantin Belousov PCB_USERFPUINITDONE); 99141bed185SKonstantin Belousov } 9926cf9a08dSKonstantin Belousov } else { 9938c6f8f3dSKonstantin Belousov error = fpusetxstate(td, xfpustate, xfpustate_size); 99441bed185SKonstantin Belousov if (error == 0) { 9958c6f8f3dSKonstantin Belousov bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 9965c6eb037SKonstantin Belousov fpuuserinited(td); 9976cf9a08dSKonstantin Belousov } 99841bed185SKonstantin Belousov } 99941bed185SKonstantin Belousov critical_exit(); 100041bed185SKonstantin Belousov return (error); 10016cf9a08dSKonstantin Belousov } 10026cf9a08dSKonstantin Belousov 10036182fdbdSPeter Wemm /* 10042652af56SColin Percival * On AuthenticAMD processors, the fxrstor instruction does not restore 10052652af56SColin Percival * the x87's stored last instruction pointer, last data pointer, and last 10062652af56SColin Percival * opcode values, except in the rare case in which the exception summary 10072652af56SColin Percival * (ES) bit in the x87 status word is set to 1. 10082652af56SColin Percival * 10092652af56SColin Percival * In order to avoid leaking this information across processes, we clean 10102652af56SColin Percival * these values by performing a dummy load before executing fxrstor(). 10112652af56SColin Percival */ 10122652af56SColin Percival static void 10132652af56SColin Percival fpu_clean_state(void) 10142652af56SColin Percival { 1015b9dda9d6SJohn Baldwin static float dummy_variable = 0.0; 10162652af56SColin Percival u_short status; 10172652af56SColin Percival 10182652af56SColin Percival /* 10192652af56SColin Percival * Clear the ES bit in the x87 status word if it is currently 10202652af56SColin Percival * set, in order to avoid causing a fault in the upcoming load. 10212652af56SColin Percival */ 10222652af56SColin Percival fnstsw(&status); 10232652af56SColin Percival if (status & 0x80) 10242652af56SColin Percival fnclex(); 10252652af56SColin Percival 10262652af56SColin Percival /* 10272652af56SColin Percival * Load the dummy variable into the x87 stack. This mangles 10282652af56SColin Percival * the x87 stack, but we don't care since we're about to call 10292652af56SColin Percival * fxrstor() anyway. 10302652af56SColin Percival */ 103114965052SDimitry Andric __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 10322652af56SColin Percival } 10332652af56SColin Percival 10342652af56SColin Percival /* 1035398dbb11SPeter Wemm * This really sucks. We want the acpi version only, but it requires 1036398dbb11SPeter Wemm * the isa_if.h file in order to get the definitions. 10376182fdbdSPeter Wemm */ 1038398dbb11SPeter Wemm #include "opt_isa.h" 1039afa88623SPeter Wemm #ifdef DEV_ISA 1040398dbb11SPeter Wemm #include <isa/isavar.h> 104154f1d0ceSGarrett Wollman /* 10425f063c7bSMike Smith * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 104354f1d0ceSGarrett Wollman */ 1044398dbb11SPeter Wemm static struct isa_pnp_id fpupnp_ids[] = { 104554f1d0ceSGarrett Wollman { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 104654f1d0ceSGarrett Wollman { 0 } 104754f1d0ceSGarrett Wollman }; 104854f1d0ceSGarrett Wollman 104954f1d0ceSGarrett Wollman static int 1050398dbb11SPeter Wemm fpupnp_probe(device_t dev) 105154f1d0ceSGarrett Wollman { 1052bb9c06c1SMike Smith int result; 1053bf2f09eeSPeter Wemm 1054398dbb11SPeter Wemm result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); 1055bf2f09eeSPeter Wemm if (result <= 0) 1056bb9c06c1SMike Smith device_quiet(dev); 1057bb9c06c1SMike Smith return (result); 105854f1d0ceSGarrett Wollman } 105954f1d0ceSGarrett Wollman 106054f1d0ceSGarrett Wollman static int 1061398dbb11SPeter Wemm fpupnp_attach(device_t dev) 106254f1d0ceSGarrett Wollman { 1063bf2f09eeSPeter Wemm 106454f1d0ceSGarrett Wollman return (0); 106554f1d0ceSGarrett Wollman } 106654f1d0ceSGarrett Wollman 1067398dbb11SPeter Wemm static device_method_t fpupnp_methods[] = { 106854f1d0ceSGarrett Wollman /* Device interface */ 1069398dbb11SPeter Wemm DEVMETHOD(device_probe, fpupnp_probe), 1070398dbb11SPeter Wemm DEVMETHOD(device_attach, fpupnp_attach), 107154f1d0ceSGarrett Wollman DEVMETHOD(device_detach, bus_generic_detach), 107254f1d0ceSGarrett Wollman DEVMETHOD(device_shutdown, bus_generic_shutdown), 107354f1d0ceSGarrett Wollman DEVMETHOD(device_suspend, bus_generic_suspend), 107454f1d0ceSGarrett Wollman DEVMETHOD(device_resume, bus_generic_resume), 107554f1d0ceSGarrett Wollman { 0, 0 } 107654f1d0ceSGarrett Wollman }; 107754f1d0ceSGarrett Wollman 1078398dbb11SPeter Wemm static driver_t fpupnp_driver = { 1079398dbb11SPeter Wemm "fpupnp", 1080398dbb11SPeter Wemm fpupnp_methods, 108154f1d0ceSGarrett Wollman 1, /* no softc */ 108254f1d0ceSGarrett Wollman }; 108354f1d0ceSGarrett Wollman 1084398dbb11SPeter Wemm static devclass_t fpupnp_devclass; 108554f1d0ceSGarrett Wollman 1086398dbb11SPeter Wemm DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); 1087d6b66397SWarner Losh ISA_PNP_INFO(fpupnp_ids); 1088586079ccSBruce Evans #endif /* DEV_ISA */ 10896cf9a08dSKonstantin Belousov 10908c6f8f3dSKonstantin Belousov static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 10918c6f8f3dSKonstantin Belousov "Kernel contexts for FPU state"); 10928c6f8f3dSKonstantin Belousov 10938c6f8f3dSKonstantin Belousov #define FPU_KERN_CTX_FPUINITDONE 0x01 1094633034feSKonstantin Belousov #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ 1095e808e13bSJohn-Mark Gurney #define FPU_KERN_CTX_INUSE 0x04 10968c6f8f3dSKonstantin Belousov 10978c6f8f3dSKonstantin Belousov struct fpu_kern_ctx { 10988c6f8f3dSKonstantin Belousov struct savefpu *prev; 10998c6f8f3dSKonstantin Belousov uint32_t flags; 11008c6f8f3dSKonstantin Belousov char hwstate1[]; 11018c6f8f3dSKonstantin Belousov }; 11028c6f8f3dSKonstantin Belousov 1103c74a3041SConrad Meyer static inline size_t __pure2 1104c74a3041SConrad Meyer fpu_kern_alloc_sz(u_int max_est) 1105c74a3041SConrad Meyer { 1106c74a3041SConrad Meyer return (sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + max_est); 1107c74a3041SConrad Meyer } 1108c74a3041SConrad Meyer 1109c74a3041SConrad Meyer static inline int __pure2 1110c74a3041SConrad Meyer fpu_kern_malloc_flags(u_int fpflags) 1111c74a3041SConrad Meyer { 1112c74a3041SConrad Meyer return (((fpflags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); 1113c74a3041SConrad Meyer } 1114c74a3041SConrad Meyer 1115c74a3041SConrad Meyer struct fpu_kern_ctx * 1116c74a3041SConrad Meyer fpu_kern_alloc_ctx_domain(int domain, u_int flags) 1117c74a3041SConrad Meyer { 1118c74a3041SConrad Meyer return (malloc_domainset(fpu_kern_alloc_sz(cpu_max_ext_state_size), 1119c74a3041SConrad Meyer M_FPUKERN_CTX, DOMAINSET_PREF(domain), 1120c74a3041SConrad Meyer fpu_kern_malloc_flags(flags))); 1121c74a3041SConrad Meyer } 1122c74a3041SConrad Meyer 11238c6f8f3dSKonstantin Belousov struct fpu_kern_ctx * 11248c6f8f3dSKonstantin Belousov fpu_kern_alloc_ctx(u_int flags) 11258c6f8f3dSKonstantin Belousov { 1126c74a3041SConrad Meyer return (malloc(fpu_kern_alloc_sz(cpu_max_ext_state_size), 1127c74a3041SConrad Meyer M_FPUKERN_CTX, fpu_kern_malloc_flags(flags))); 11288c6f8f3dSKonstantin Belousov } 11298c6f8f3dSKonstantin Belousov 11308c6f8f3dSKonstantin Belousov void 11318c6f8f3dSKonstantin Belousov fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 11328c6f8f3dSKonstantin Belousov { 11338c6f8f3dSKonstantin Belousov 1134e808e13bSJohn-Mark Gurney KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); 11358c6f8f3dSKonstantin Belousov /* XXXKIB clear the memory ? */ 11368c6f8f3dSKonstantin Belousov free(ctx, M_FPUKERN_CTX); 11378c6f8f3dSKonstantin Belousov } 11388c6f8f3dSKonstantin Belousov 11398c6f8f3dSKonstantin Belousov static struct savefpu * 11408c6f8f3dSKonstantin Belousov fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) 11418c6f8f3dSKonstantin Belousov { 11428c6f8f3dSKonstantin Belousov vm_offset_t p; 11438c6f8f3dSKonstantin Belousov 11448c6f8f3dSKonstantin Belousov p = (vm_offset_t)&ctx->hwstate1; 11458c6f8f3dSKonstantin Belousov p = roundup2(p, XSAVE_AREA_ALIGN); 11468c6f8f3dSKonstantin Belousov return ((struct savefpu *)p); 11478c6f8f3dSKonstantin Belousov } 11488c6f8f3dSKonstantin Belousov 1149849ce31aSConrad Meyer void 11506cf9a08dSKonstantin Belousov fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 11516cf9a08dSKonstantin Belousov { 11526cf9a08dSKonstantin Belousov struct pcb *pcb; 11536cf9a08dSKonstantin Belousov 1154cf1c4776SKonstantin Belousov pcb = td->td_pcb; 1155cf1c4776SKonstantin Belousov KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, 1156cf1c4776SKonstantin Belousov ("ctx is required when !FPU_KERN_NOCTX")); 1157cf1c4776SKonstantin Belousov KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, 1158cf1c4776SKonstantin Belousov ("using inuse ctx")); 1159cf1c4776SKonstantin Belousov KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, 1160cf1c4776SKonstantin Belousov ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); 1161e808e13bSJohn-Mark Gurney 1162cf1c4776SKonstantin Belousov if ((flags & FPU_KERN_NOCTX) != 0) { 1163cf1c4776SKonstantin Belousov critical_enter(); 1164cf1c4776SKonstantin Belousov stop_emulating(); 1165cf1c4776SKonstantin Belousov if (curthread == PCPU_GET(fpcurthread)) { 1166cf1c4776SKonstantin Belousov fpusave(curpcb->pcb_save); 1167cf1c4776SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 1168cf1c4776SKonstantin Belousov } else { 1169cf1c4776SKonstantin Belousov KASSERT(PCPU_GET(fpcurthread) == NULL, 1170cf1c4776SKonstantin Belousov ("invalid fpcurthread")); 1171cf1c4776SKonstantin Belousov } 1172cf1c4776SKonstantin Belousov 1173cf1c4776SKonstantin Belousov /* 1174cf1c4776SKonstantin Belousov * This breaks XSAVEOPT tracker, but 1175cf1c4776SKonstantin Belousov * PCB_FPUNOSAVE state is supposed to never need to 1176cf1c4776SKonstantin Belousov * save FPU context at all. 1177cf1c4776SKonstantin Belousov */ 1178cf1c4776SKonstantin Belousov fpurestore(fpu_initialstate); 1179cf1c4776SKonstantin Belousov set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | 1180cf1c4776SKonstantin Belousov PCB_FPUINITDONE); 1181849ce31aSConrad Meyer return; 1182cf1c4776SKonstantin Belousov } 1183633034feSKonstantin Belousov if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { 1184e808e13bSJohn-Mark Gurney ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; 1185849ce31aSConrad Meyer return; 1186633034feSKonstantin Belousov } 118741bed185SKonstantin Belousov critical_enter(); 11888c6f8f3dSKonstantin Belousov KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == 11898c6f8f3dSKonstantin Belousov get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); 1190e808e13bSJohn-Mark Gurney ctx->flags = FPU_KERN_CTX_INUSE; 11916cf9a08dSKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) 11926cf9a08dSKonstantin Belousov ctx->flags |= FPU_KERN_CTX_FPUINITDONE; 11936cf9a08dSKonstantin Belousov fpuexit(td); 11946cf9a08dSKonstantin Belousov ctx->prev = pcb->pcb_save; 11958c6f8f3dSKonstantin Belousov pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); 1196e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_KERNFPU); 1197e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE); 119841bed185SKonstantin Belousov critical_exit(); 11996cf9a08dSKonstantin Belousov } 12006cf9a08dSKonstantin Belousov 12016cf9a08dSKonstantin Belousov int 12026cf9a08dSKonstantin Belousov fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 12036cf9a08dSKonstantin Belousov { 12046cf9a08dSKonstantin Belousov struct pcb *pcb; 12056cf9a08dSKonstantin Belousov 1206cf1c4776SKonstantin Belousov pcb = td->td_pcb; 1207cf1c4776SKonstantin Belousov 1208cf1c4776SKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { 1209cf1c4776SKonstantin Belousov KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); 1210cf1c4776SKonstantin Belousov KASSERT(PCPU_GET(fpcurthread) == NULL, 1211cf1c4776SKonstantin Belousov ("non-NULL fpcurthread for PCB_FPUNOSAVE")); 1212cf1c4776SKonstantin Belousov CRITICAL_ASSERT(td); 1213cf1c4776SKonstantin Belousov 1214cf1c4776SKonstantin Belousov clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); 1215cf1c4776SKonstantin Belousov start_emulating(); 1216cf1c4776SKonstantin Belousov } else { 1217e808e13bSJohn-Mark Gurney KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, 1218e808e13bSJohn-Mark Gurney ("leaving not inuse ctx")); 1219e808e13bSJohn-Mark Gurney ctx->flags &= ~FPU_KERN_CTX_INUSE; 1220e808e13bSJohn-Mark Gurney 1221cf1c4776SKonstantin Belousov if (is_fpu_kern_thread(0) && 1222cf1c4776SKonstantin Belousov (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) 1223633034feSKonstantin Belousov return (0); 1224cf1c4776SKonstantin Belousov KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, 1225cf1c4776SKonstantin Belousov ("dummy ctx")); 122699753495SKonstantin Belousov critical_enter(); 12276cf9a08dSKonstantin Belousov if (curthread == PCPU_GET(fpcurthread)) 12286cf9a08dSKonstantin Belousov fpudrop(); 12296cf9a08dSKonstantin Belousov pcb->pcb_save = ctx->prev; 1230cf1c4776SKonstantin Belousov } 1231cf1c4776SKonstantin Belousov 12328c6f8f3dSKonstantin Belousov if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { 1233e6c006d9SJung-uk Kim if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { 1234e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 1235e4062350SKonstantin Belousov if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0) 1236e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_KERNFPU); 1237e4062350SKonstantin Belousov } else if ((pcb->pcb_flags & PCB_KERNFPU_THR) == 0) 1238e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); 12396cf9a08dSKonstantin Belousov } else { 12406cf9a08dSKonstantin Belousov if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) 1241e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 12426cf9a08dSKonstantin Belousov else 1243e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE); 12446cf9a08dSKonstantin Belousov KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 12456cf9a08dSKonstantin Belousov } 124641bed185SKonstantin Belousov critical_exit(); 12476cf9a08dSKonstantin Belousov return (0); 12486cf9a08dSKonstantin Belousov } 12496cf9a08dSKonstantin Belousov 12506cf9a08dSKonstantin Belousov int 12516cf9a08dSKonstantin Belousov fpu_kern_thread(u_int flags) 12526cf9a08dSKonstantin Belousov { 12536cf9a08dSKonstantin Belousov 12546cf9a08dSKonstantin Belousov KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 12556cf9a08dSKonstantin Belousov ("Only kthread may use fpu_kern_thread")); 12561965c139SKonstantin Belousov KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), 12578c6f8f3dSKonstantin Belousov ("mangled pcb_save")); 12581965c139SKonstantin Belousov KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); 12596cf9a08dSKonstantin Belousov 1260e4062350SKonstantin Belousov set_pcb_flags(curpcb, PCB_KERNFPU | PCB_KERNFPU_THR); 12616cf9a08dSKonstantin Belousov return (0); 12626cf9a08dSKonstantin Belousov } 12636cf9a08dSKonstantin Belousov 12646cf9a08dSKonstantin Belousov int 12656cf9a08dSKonstantin Belousov is_fpu_kern_thread(u_int flags) 12666cf9a08dSKonstantin Belousov { 12676cf9a08dSKonstantin Belousov 12686cf9a08dSKonstantin Belousov if ((curthread->td_pflags & TDP_KTHREAD) == 0) 12696cf9a08dSKonstantin Belousov return (0); 1270e4062350SKonstantin Belousov return ((curpcb->pcb_flags & PCB_KERNFPU_THR) != 0); 12716cf9a08dSKonstantin Belousov } 12722741efecSPeter Grehan 12732741efecSPeter Grehan /* 12742741efecSPeter Grehan * FPU save area alloc/free/init utility routines 12752741efecSPeter Grehan */ 12762741efecSPeter Grehan struct savefpu * 12772741efecSPeter Grehan fpu_save_area_alloc(void) 12782741efecSPeter Grehan { 12792741efecSPeter Grehan 1280854e90daSEric van Gyzen return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); 12812741efecSPeter Grehan } 12822741efecSPeter Grehan 12832741efecSPeter Grehan void 12842741efecSPeter Grehan fpu_save_area_free(struct savefpu *fsa) 12852741efecSPeter Grehan { 12862741efecSPeter Grehan 12872741efecSPeter Grehan uma_zfree(fpu_save_area_zone, fsa); 12882741efecSPeter Grehan } 12892741efecSPeter Grehan 12902741efecSPeter Grehan void 12912741efecSPeter Grehan fpu_save_area_reset(struct savefpu *fsa) 12922741efecSPeter Grehan { 12932741efecSPeter Grehan 12942741efecSPeter Grehan bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); 12952741efecSPeter Grehan } 1296