15b81b6b3SRodney W. Grimes /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 45b81b6b3SRodney W. Grimes * Copyright (c) 1990 William Jolitz. 55b81b6b3SRodney W. Grimes * Copyright (c) 1991 The Regents of the University of California. 65b81b6b3SRodney W. Grimes * All rights reserved. 75b81b6b3SRodney W. Grimes * 85b81b6b3SRodney W. Grimes * Redistribution and use in source and binary forms, with or without 95b81b6b3SRodney W. Grimes * modification, are permitted provided that the following conditions 105b81b6b3SRodney W. Grimes * are met: 115b81b6b3SRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 125b81b6b3SRodney W. Grimes * notice, this list of conditions and the following disclaimer. 135b81b6b3SRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 145b81b6b3SRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 155b81b6b3SRodney W. Grimes * documentation and/or other materials provided with the distribution. 16fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 175b81b6b3SRodney W. Grimes * may be used to endorse or promote products derived from this software 185b81b6b3SRodney W. Grimes * without specific prior written permission. 195b81b6b3SRodney W. Grimes * 205b81b6b3SRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 215b81b6b3SRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 225b81b6b3SRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 235b81b6b3SRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 245b81b6b3SRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 255b81b6b3SRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 265b81b6b3SRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 275b81b6b3SRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 285b81b6b3SRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 295b81b6b3SRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 305b81b6b3SRodney W. Grimes * SUCH DAMAGE. 315b81b6b3SRodney W. Grimes * 3221616ec3SPeter Wemm * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 335b81b6b3SRodney W. Grimes */ 345b81b6b3SRodney W. Grimes 3556ae44c5SDavid E. O'Brien #include <sys/cdefs.h> 3656ae44c5SDavid E. O'Brien __FBSDID("$FreeBSD$"); 3756ae44c5SDavid E. O'Brien 38f540b106SGarrett Wollman #include <sys/param.h> 39f540b106SGarrett Wollman #include <sys/systm.h> 406182fdbdSPeter Wemm #include <sys/bus.h> 413a34a5c3SPoul-Henning Kamp #include <sys/kernel.h> 42fb919e4dSMark Murray #include <sys/lock.h> 43cd59d49dSBruce Evans #include <sys/malloc.h> 446182fdbdSPeter Wemm #include <sys/module.h> 45c1ef8aacSJake Burkholder #include <sys/mutex.h> 46fb919e4dSMark Murray #include <sys/mutex.h> 47fb919e4dSMark Murray #include <sys/proc.h> 48fb919e4dSMark Murray #include <sys/sysctl.h> 496182fdbdSPeter Wemm #include <machine/bus.h> 506182fdbdSPeter Wemm #include <sys/rman.h> 51663f1485SBruce Evans #include <sys/signalvar.h> 522741efecSPeter Grehan #include <vm/uma.h> 532f86936aSGarrett Wollman 547f47cf2fSBruce Evans #include <machine/cputypes.h> 557f47cf2fSBruce Evans #include <machine/frame.h> 560d2a2989SPeter Wemm #include <machine/intr_machdep.h> 57c673fe98SBruce Evans #include <machine/md_var.h> 585400ed3bSPeter Wemm #include <machine/pcb.h> 597f47cf2fSBruce Evans #include <machine/psl.h> 606182fdbdSPeter Wemm #include <machine/resource.h> 61f540b106SGarrett Wollman #include <machine/specialreg.h> 627f47cf2fSBruce Evans #include <machine/segments.h> 6330abe507SJonathan Mini #include <machine/ucontext.h> 648b4fc8b1SKonstantin Belousov #include <x86/ifunc.h> 652f86936aSGarrett Wollman 665b81b6b3SRodney W. Grimes /* 67bf2f09eeSPeter Wemm * Floating point support. 685b81b6b3SRodney W. Grimes */ 695b81b6b3SRodney W. Grimes 70a5f50ef9SJoerg Wunsch #if defined(__GNUCLIKE_ASM) && !defined(lint) 715b81b6b3SRodney W. Grimes 7217275403SJung-uk Kim #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 7330402401SJung-uk Kim #define fnclex() __asm __volatile("fnclex") 7430402401SJung-uk Kim #define fninit() __asm __volatile("fninit") 751d37f051SBruce Evans #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 762e50fa36SJung-uk Kim #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 7730402401SJung-uk Kim #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 789d146ac5SPeter Wemm #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 7907c86dcfSJung-uk Kim #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) 80a81f9fedSKonstantin Belousov #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) 815b81b6b3SRodney W. Grimes 8294818d19SKonstantin Belousov static __inline void 8394818d19SKonstantin Belousov xrstor(char *addr, uint64_t mask) 8494818d19SKonstantin Belousov { 8594818d19SKonstantin Belousov uint32_t low, hi; 8694818d19SKonstantin Belousov 8794818d19SKonstantin Belousov low = mask; 8894818d19SKonstantin Belousov hi = mask >> 32; 897574a595SJohn Baldwin __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); 9094818d19SKonstantin Belousov } 9194818d19SKonstantin Belousov 9294818d19SKonstantin Belousov static __inline void 9394818d19SKonstantin Belousov xsave(char *addr, uint64_t mask) 9494818d19SKonstantin Belousov { 9594818d19SKonstantin Belousov uint32_t low, hi; 9694818d19SKonstantin Belousov 9794818d19SKonstantin Belousov low = mask; 9894818d19SKonstantin Belousov hi = mask >> 32; 997574a595SJohn Baldwin __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : 1007574a595SJohn Baldwin "memory"); 10194818d19SKonstantin Belousov } 10294818d19SKonstantin Belousov 1038207def1SConrad Meyer static __inline void 1048207def1SConrad Meyer xsaveopt(char *addr, uint64_t mask) 1058207def1SConrad Meyer { 1068207def1SConrad Meyer uint32_t low, hi; 1078207def1SConrad Meyer 1088207def1SConrad Meyer low = mask; 1098207def1SConrad Meyer hi = mask >> 32; 1108207def1SConrad Meyer __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : 1118207def1SConrad Meyer "memory"); 1128207def1SConrad Meyer } 1138207def1SConrad Meyer 114cf4e1c46SPeter Wemm #else /* !(__GNUCLIKE_ASM && !lint) */ 1155b81b6b3SRodney W. Grimes 11617275403SJung-uk Kim void fldcw(u_short cw); 11789c9a483SAlfred Perlstein void fnclex(void); 11889c9a483SAlfred Perlstein void fninit(void); 11989c9a483SAlfred Perlstein void fnstcw(caddr_t addr); 12089c9a483SAlfred Perlstein void fnstsw(caddr_t addr); 12189c9a483SAlfred Perlstein void fxsave(caddr_t addr); 12289c9a483SAlfred Perlstein void fxrstor(caddr_t addr); 12307c86dcfSJung-uk Kim void ldmxcsr(u_int csr); 124a42fa0afSKonstantin Belousov void stmxcsr(u_int *csr); 12594818d19SKonstantin Belousov void xrstor(char *addr, uint64_t mask); 12694818d19SKonstantin Belousov void xsave(char *addr, uint64_t mask); 1278207def1SConrad Meyer void xsaveopt(char *addr, uint64_t mask); 1285b81b6b3SRodney W. Grimes 129cf4e1c46SPeter Wemm #endif /* __GNUCLIKE_ASM && !lint */ 1305b81b6b3SRodney W. Grimes 131d706ec29SJohn Baldwin #define start_emulating() load_cr0(rcr0() | CR0_TS) 132d706ec29SJohn Baldwin #define stop_emulating() clts() 133d706ec29SJohn Baldwin 1348c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct savefpu) == 512); 1358c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct xstate_hdr) == 64); 1368c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct savefpu_ymm) == 832); 1378c6f8f3dSKonstantin Belousov 1388c6f8f3dSKonstantin Belousov /* 1398c6f8f3dSKonstantin Belousov * This requirement is to make it easier for asm code to calculate 1408c6f8f3dSKonstantin Belousov * offset of the fpu save area from the pcb address. FPU save area 141b74a2290SKonstantin Belousov * must be 64-byte aligned. 1428c6f8f3dSKonstantin Belousov */ 1438c6f8f3dSKonstantin Belousov CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); 1445b81b6b3SRodney W. Grimes 145180e57e5SJohn Baldwin /* 146180e57e5SJohn Baldwin * Ensure the copy of XCR0 saved in a core is contained in the padding 147180e57e5SJohn Baldwin * area. 148180e57e5SJohn Baldwin */ 149180e57e5SJohn Baldwin CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savefpu, sv_pad) && 150180e57e5SJohn Baldwin X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savefpu)); 151180e57e5SJohn Baldwin 1522652af56SColin Percival static void fpu_clean_state(void); 1532652af56SColin Percival 1540b7dc0a7SJohn Baldwin SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 155f0188618SHans Petter Selasky SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); 1563a34a5c3SPoul-Henning Kamp 157d1a07e31SKonstantin Belousov int lazy_fpu_switch = 0; 158d1a07e31SKonstantin Belousov SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 159d1a07e31SKonstantin Belousov &lazy_fpu_switch, 0, 160d1a07e31SKonstantin Belousov "Lazily load FPU context after context switch"); 161d1a07e31SKonstantin Belousov 1628c6f8f3dSKonstantin Belousov int use_xsave; /* non-static for cpu_switch.S */ 1638c6f8f3dSKonstantin Belousov uint64_t xsave_mask; /* the same */ 1642741efecSPeter Grehan static uma_zone_t fpu_save_area_zone; 1658c6f8f3dSKonstantin Belousov static struct savefpu *fpu_initialstate; 1668c6f8f3dSKonstantin Belousov 167333d0c60SKonstantin Belousov struct xsave_area_elm_descr { 168333d0c60SKonstantin Belousov u_int offset; 169333d0c60SKonstantin Belousov u_int size; 170333d0c60SKonstantin Belousov } *xsave_area_desc; 171333d0c60SKonstantin Belousov 1728b4fc8b1SKonstantin Belousov static void 1738207def1SConrad Meyer fpusave_xsaveopt(void *addr) 1748207def1SConrad Meyer { 1758207def1SConrad Meyer 1768207def1SConrad Meyer xsaveopt((char *)addr, xsave_mask); 1778207def1SConrad Meyer } 1788207def1SConrad Meyer 1798207def1SConrad Meyer static void 1808b4fc8b1SKonstantin Belousov fpusave_xsave(void *addr) 1818c6f8f3dSKonstantin Belousov { 1828c6f8f3dSKonstantin Belousov 1838c6f8f3dSKonstantin Belousov xsave((char *)addr, xsave_mask); 1848b4fc8b1SKonstantin Belousov } 1858b4fc8b1SKonstantin Belousov 1868b4fc8b1SKonstantin Belousov static void 1878b4fc8b1SKonstantin Belousov fpurestore_xrstor(void *addr) 1888b4fc8b1SKonstantin Belousov { 1898b4fc8b1SKonstantin Belousov 1908b4fc8b1SKonstantin Belousov xrstor((char *)addr, xsave_mask); 1918b4fc8b1SKonstantin Belousov } 1928b4fc8b1SKonstantin Belousov 1938b4fc8b1SKonstantin Belousov static void 1948b4fc8b1SKonstantin Belousov fpusave_fxsave(void *addr) 1958b4fc8b1SKonstantin Belousov { 1968b4fc8b1SKonstantin Belousov 1978c6f8f3dSKonstantin Belousov fxsave((char *)addr); 1988c6f8f3dSKonstantin Belousov } 1998c6f8f3dSKonstantin Belousov 2008b4fc8b1SKonstantin Belousov static void 2018b4fc8b1SKonstantin Belousov fpurestore_fxrstor(void *addr) 2028b4fc8b1SKonstantin Belousov { 2038b4fc8b1SKonstantin Belousov 2048b4fc8b1SKonstantin Belousov fxrstor((char *)addr); 2058b4fc8b1SKonstantin Belousov } 2068b4fc8b1SKonstantin Belousov 2078b4fc8b1SKonstantin Belousov static void 2088b4fc8b1SKonstantin Belousov init_xsave(void) 2098c6f8f3dSKonstantin Belousov { 2108c6f8f3dSKonstantin Belousov 2118c6f8f3dSKonstantin Belousov if (use_xsave) 2128b4fc8b1SKonstantin Belousov return; 2138b4fc8b1SKonstantin Belousov if ((cpu_feature2 & CPUID2_XSAVE) == 0) 2148b4fc8b1SKonstantin Belousov return; 2158b4fc8b1SKonstantin Belousov use_xsave = 1; 2168b4fc8b1SKonstantin Belousov TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); 2178b4fc8b1SKonstantin Belousov } 2188b4fc8b1SKonstantin Belousov 2198b4fc8b1SKonstantin Belousov DEFINE_IFUNC(, void, fpusave, (void *), static) 2208b4fc8b1SKonstantin Belousov { 2218b4fc8b1SKonstantin Belousov 2228b4fc8b1SKonstantin Belousov init_xsave(); 2238207def1SConrad Meyer if (use_xsave) 2248207def1SConrad Meyer return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ? 2258207def1SConrad Meyer fpusave_xsaveopt : fpusave_xsave); 2268207def1SConrad Meyer return (fpusave_fxsave); 2278b4fc8b1SKonstantin Belousov } 2288b4fc8b1SKonstantin Belousov 2298b4fc8b1SKonstantin Belousov DEFINE_IFUNC(, void, fpurestore, (void *), static) 2308b4fc8b1SKonstantin Belousov { 2318b4fc8b1SKonstantin Belousov 2328b4fc8b1SKonstantin Belousov init_xsave(); 2338b4fc8b1SKonstantin Belousov return (use_xsave ? fpurestore_xrstor : fpurestore_fxrstor); 2348c6f8f3dSKonstantin Belousov } 2353902c3efSSteve Passe 2361d22d877SJung-uk Kim void 2371d22d877SJung-uk Kim fpususpend(void *addr) 2381d22d877SJung-uk Kim { 2391d22d877SJung-uk Kim u_long cr0; 2401d22d877SJung-uk Kim 2411d22d877SJung-uk Kim cr0 = rcr0(); 2421d22d877SJung-uk Kim stop_emulating(); 2431d22d877SJung-uk Kim fpusave(addr); 2441d22d877SJung-uk Kim load_cr0(cr0); 2451d22d877SJung-uk Kim } 2461d22d877SJung-uk Kim 247b1d735baSJohn Baldwin void 248b1d735baSJohn Baldwin fpuresume(void *addr) 249b1d735baSJohn Baldwin { 250b1d735baSJohn Baldwin u_long cr0; 251b1d735baSJohn Baldwin 252b1d735baSJohn Baldwin cr0 = rcr0(); 253b1d735baSJohn Baldwin stop_emulating(); 254b1d735baSJohn Baldwin fninit(); 255b1d735baSJohn Baldwin if (use_xsave) 256b1d735baSJohn Baldwin load_xcr(XCR0, xsave_mask); 257b1d735baSJohn Baldwin fpurestore(addr); 258b1d735baSJohn Baldwin load_cr0(cr0); 259b1d735baSJohn Baldwin } 260b1d735baSJohn Baldwin 2615b81b6b3SRodney W. Grimes /* 2628c6f8f3dSKonstantin Belousov * Enable XSAVE if supported and allowed by user. 2638c6f8f3dSKonstantin Belousov * Calculate the xsave_mask. 2648c6f8f3dSKonstantin Belousov */ 2658c6f8f3dSKonstantin Belousov static void 2668c6f8f3dSKonstantin Belousov fpuinit_bsp1(void) 2678c6f8f3dSKonstantin Belousov { 2688c6f8f3dSKonstantin Belousov u_int cp[4]; 2698c6f8f3dSKonstantin Belousov uint64_t xsave_mask_user; 2709cffc92cSKonstantin Belousov bool old_wp; 2718c6f8f3dSKonstantin Belousov 272d1a07e31SKonstantin Belousov TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); 2738c6f8f3dSKonstantin Belousov if (!use_xsave) 2748c6f8f3dSKonstantin Belousov return; 2758c6f8f3dSKonstantin Belousov cpuid_count(0xd, 0x0, cp); 2768c6f8f3dSKonstantin Belousov xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 2778c6f8f3dSKonstantin Belousov if ((cp[0] & xsave_mask) != xsave_mask) 2788c6f8f3dSKonstantin Belousov panic("CPU0 does not support X87 or SSE: %x", cp[0]); 2798c6f8f3dSKonstantin Belousov xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; 2808c6f8f3dSKonstantin Belousov xsave_mask_user = xsave_mask; 2818c6f8f3dSKonstantin Belousov TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); 2828c6f8f3dSKonstantin Belousov xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 2838c6f8f3dSKonstantin Belousov xsave_mask &= xsave_mask_user; 2840eb7ae8dSJohn Baldwin if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) 2850eb7ae8dSJohn Baldwin xsave_mask &= ~XFEATURE_AVX512; 2860eb7ae8dSJohn Baldwin if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) 2870eb7ae8dSJohn Baldwin xsave_mask &= ~XFEATURE_MPX; 288333d0c60SKonstantin Belousov 289333d0c60SKonstantin Belousov cpuid_count(0xd, 0x1, cp); 290333d0c60SKonstantin Belousov if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { 291333d0c60SKonstantin Belousov /* 292333d0c60SKonstantin Belousov * Patch the XSAVE instruction in the cpu_switch code 293333d0c60SKonstantin Belousov * to XSAVEOPT. We assume that XSAVE encoding used 294333d0c60SKonstantin Belousov * REX byte, and set the bit 4 of the r/m byte. 2959cffc92cSKonstantin Belousov * 2969cffc92cSKonstantin Belousov * It seems that some BIOSes give control to the OS 2979cffc92cSKonstantin Belousov * with CR0.WP already set, making the kernel text 2989cffc92cSKonstantin Belousov * read-only before cpu_startup(). 299333d0c60SKonstantin Belousov */ 3009cffc92cSKonstantin Belousov old_wp = disable_wp(); 301333d0c60SKonstantin Belousov ctx_switch_xsave[3] |= 0x10; 3029cffc92cSKonstantin Belousov restore_wp(old_wp); 303333d0c60SKonstantin Belousov } 3048c6f8f3dSKonstantin Belousov } 3058c6f8f3dSKonstantin Belousov 3068c6f8f3dSKonstantin Belousov /* 3078c6f8f3dSKonstantin Belousov * Calculate the fpu save area size. 3088c6f8f3dSKonstantin Belousov */ 3098c6f8f3dSKonstantin Belousov static void 3108c6f8f3dSKonstantin Belousov fpuinit_bsp2(void) 3118c6f8f3dSKonstantin Belousov { 3128c6f8f3dSKonstantin Belousov u_int cp[4]; 3138c6f8f3dSKonstantin Belousov 3148c6f8f3dSKonstantin Belousov if (use_xsave) { 3158c6f8f3dSKonstantin Belousov cpuid_count(0xd, 0x0, cp); 3168c6f8f3dSKonstantin Belousov cpu_max_ext_state_size = cp[1]; 3178c6f8f3dSKonstantin Belousov 3188c6f8f3dSKonstantin Belousov /* 3198c6f8f3dSKonstantin Belousov * Reload the cpu_feature2, since we enabled OSXSAVE. 3208c6f8f3dSKonstantin Belousov */ 3218c6f8f3dSKonstantin Belousov do_cpuid(1, cp); 3228c6f8f3dSKonstantin Belousov cpu_feature2 = cp[2]; 3238c6f8f3dSKonstantin Belousov } else 3248c6f8f3dSKonstantin Belousov cpu_max_ext_state_size = sizeof(struct savefpu); 3258c6f8f3dSKonstantin Belousov } 3268c6f8f3dSKonstantin Belousov 3278c6f8f3dSKonstantin Belousov /* 3288c6f8f3dSKonstantin Belousov * Initialize the floating point unit. 329da4113b3SPeter Wemm */ 330398dbb11SPeter Wemm void 3311c89210cSPeter Wemm fpuinit(void) 332da4113b3SPeter Wemm { 3330689bdccSJohn Baldwin register_t saveintr; 33496a7759eSPeter Wemm u_int mxcsr; 335398dbb11SPeter Wemm u_short control; 336da4113b3SPeter Wemm 3378c6f8f3dSKonstantin Belousov if (IS_BSP()) 3388c6f8f3dSKonstantin Belousov fpuinit_bsp1(); 3398c6f8f3dSKonstantin Belousov 3408c6f8f3dSKonstantin Belousov if (use_xsave) { 3418c6f8f3dSKonstantin Belousov load_cr4(rcr4() | CR4_XSAVE); 3427574a595SJohn Baldwin load_xcr(XCR0, xsave_mask); 3438c6f8f3dSKonstantin Belousov } 3448c6f8f3dSKonstantin Belousov 3458c6f8f3dSKonstantin Belousov /* 3468c6f8f3dSKonstantin Belousov * XCR0 shall be set up before CPU can report the save area size. 3478c6f8f3dSKonstantin Belousov */ 3488c6f8f3dSKonstantin Belousov if (IS_BSP()) 3498c6f8f3dSKonstantin Belousov fpuinit_bsp2(); 3508c6f8f3dSKonstantin Belousov 35199753495SKonstantin Belousov /* 35299753495SKonstantin Belousov * It is too early for critical_enter() to work on AP. 35399753495SKonstantin Belousov */ 3540689bdccSJohn Baldwin saveintr = intr_disable(); 3555b81b6b3SRodney W. Grimes stop_emulating(); 3565b81b6b3SRodney W. Grimes fninit(); 357398dbb11SPeter Wemm control = __INITIAL_FPUCW__; 35817275403SJung-uk Kim fldcw(control); 35996a7759eSPeter Wemm mxcsr = __INITIAL_MXCSR__; 36096a7759eSPeter Wemm ldmxcsr(mxcsr); 361a8346a98SJohn Baldwin start_emulating(); 3620689bdccSJohn Baldwin intr_restore(saveintr); 3635b81b6b3SRodney W. Grimes } 3645b81b6b3SRodney W. Grimes 3655b81b6b3SRodney W. Grimes /* 3668c6f8f3dSKonstantin Belousov * On the boot CPU we generate a clean state that is used to 3678c6f8f3dSKonstantin Belousov * initialize the floating point unit when it is first used by a 3688c6f8f3dSKonstantin Belousov * process. 3698c6f8f3dSKonstantin Belousov */ 3708c6f8f3dSKonstantin Belousov static void 3718c6f8f3dSKonstantin Belousov fpuinitstate(void *arg __unused) 3728c6f8f3dSKonstantin Belousov { 373*fdfe249bSKonstantin Belousov uint64_t *xstate_bv; 3748c6f8f3dSKonstantin Belousov register_t saveintr; 375333d0c60SKonstantin Belousov int cp[4], i, max_ext_n; 3768c6f8f3dSKonstantin Belousov 3778c6f8f3dSKonstantin Belousov fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, 3788c6f8f3dSKonstantin Belousov M_WAITOK | M_ZERO); 3798c6f8f3dSKonstantin Belousov saveintr = intr_disable(); 3808c6f8f3dSKonstantin Belousov stop_emulating(); 3818c6f8f3dSKonstantin Belousov 3828207def1SConrad Meyer fpusave_fxsave(fpu_initialstate); 3838c6f8f3dSKonstantin Belousov if (fpu_initialstate->sv_env.en_mxcsr_mask) 3848c6f8f3dSKonstantin Belousov cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; 3858c6f8f3dSKonstantin Belousov else 3868c6f8f3dSKonstantin Belousov cpu_mxcsr_mask = 0xFFBF; 3878c6f8f3dSKonstantin Belousov 3888c6f8f3dSKonstantin Belousov /* 389b57e6814SKonstantin Belousov * The fninit instruction does not modify XMM registers or x87 390b57e6814SKonstantin Belousov * registers (MM/ST). The fpusave call dumped the garbage 391b57e6814SKonstantin Belousov * contained in the registers after reset to the initial state 392b57e6814SKonstantin Belousov * saved. Clear XMM and x87 registers file image to make the 393b57e6814SKonstantin Belousov * startup program state and signal handler XMM/x87 register 394b57e6814SKonstantin Belousov * content predictable. 3958c6f8f3dSKonstantin Belousov */ 396b57e6814SKonstantin Belousov bzero(fpu_initialstate->sv_fp, sizeof(fpu_initialstate->sv_fp)); 397b57e6814SKonstantin Belousov bzero(fpu_initialstate->sv_xmm, sizeof(fpu_initialstate->sv_xmm)); 3988c6f8f3dSKonstantin Belousov 399333d0c60SKonstantin Belousov /* 400333d0c60SKonstantin Belousov * Create a table describing the layout of the CPU Extended 401333d0c60SKonstantin Belousov * Save Area. 402333d0c60SKonstantin Belousov */ 40314f52559SKonstantin Belousov if (use_xsave) { 404*fdfe249bSKonstantin Belousov xstate_bv = (uint64_t *)((char *)(fpu_initialstate + 1) + 405*fdfe249bSKonstantin Belousov offsetof(struct xstate_hdr, xstate_bv)); 406*fdfe249bSKonstantin Belousov *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 407*fdfe249bSKonstantin Belousov 408333d0c60SKonstantin Belousov max_ext_n = flsl(xsave_mask); 409333d0c60SKonstantin Belousov xsave_area_desc = malloc(max_ext_n * sizeof(struct 410333d0c60SKonstantin Belousov xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); 411333d0c60SKonstantin Belousov /* x87 state */ 412333d0c60SKonstantin Belousov xsave_area_desc[0].offset = 0; 413333d0c60SKonstantin Belousov xsave_area_desc[0].size = 160; 414333d0c60SKonstantin Belousov /* XMM */ 415333d0c60SKonstantin Belousov xsave_area_desc[1].offset = 160; 416333d0c60SKonstantin Belousov xsave_area_desc[1].size = 288 - 160; 417333d0c60SKonstantin Belousov 418333d0c60SKonstantin Belousov for (i = 2; i < max_ext_n; i++) { 419333d0c60SKonstantin Belousov cpuid_count(0xd, i, cp); 420333d0c60SKonstantin Belousov xsave_area_desc[i].offset = cp[1]; 421333d0c60SKonstantin Belousov xsave_area_desc[i].size = cp[0]; 422333d0c60SKonstantin Belousov } 423333d0c60SKonstantin Belousov } 424333d0c60SKonstantin Belousov 4252741efecSPeter Grehan fpu_save_area_zone = uma_zcreate("FPU_save_area", 4262741efecSPeter Grehan cpu_max_ext_state_size, NULL, NULL, NULL, NULL, 4272741efecSPeter Grehan XSAVE_AREA_ALIGN - 1, 0); 4282741efecSPeter Grehan 4298c6f8f3dSKonstantin Belousov start_emulating(); 4308c6f8f3dSKonstantin Belousov intr_restore(saveintr); 4318c6f8f3dSKonstantin Belousov } 432ad456dd9SKyle Evans /* EFIRT needs this to be initialized before we can enter our EFI environment */ 433ad456dd9SKyle Evans SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_FIRST, fpuinitstate, NULL); 4348c6f8f3dSKonstantin Belousov 4358c6f8f3dSKonstantin Belousov /* 4365b81b6b3SRodney W. Grimes * Free coprocessor (if we have it). 4375b81b6b3SRodney W. Grimes */ 4385b81b6b3SRodney W. Grimes void 439bf2f09eeSPeter Wemm fpuexit(struct thread *td) 4405b81b6b3SRodney W. Grimes { 4415b81b6b3SRodney W. Grimes 44299753495SKonstantin Belousov critical_enter(); 4431c89210cSPeter Wemm if (curthread == PCPU_GET(fpcurthread)) { 4441c89210cSPeter Wemm stop_emulating(); 44583b22b05SKonstantin Belousov fpusave(curpcb->pcb_save); 4461c89210cSPeter Wemm start_emulating(); 4476dfc9e44SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 4481c89210cSPeter Wemm } 44999753495SKonstantin Belousov critical_exit(); 4505b81b6b3SRodney W. Grimes } 4515b81b6b3SRodney W. Grimes 45230abe507SJonathan Mini int 453f132cd05SKonstantin Belousov fpuformat(void) 45430abe507SJonathan Mini { 45530abe507SJonathan Mini 45630abe507SJonathan Mini return (_MC_FPFMT_XMM); 45730abe507SJonathan Mini } 45830abe507SJonathan Mini 4595b81b6b3SRodney W. Grimes /* 460a7674320SMartin Cracauer * The following mechanism is used to ensure that the FPE_... value 461a7674320SMartin Cracauer * that is passed as a trapcode to the signal handler of the user 462a7674320SMartin Cracauer * process does not have more than one bit set. 463a7674320SMartin Cracauer * 464a7674320SMartin Cracauer * Multiple bits may be set if the user process modifies the control 465a7674320SMartin Cracauer * word while a status word bit is already set. While this is a sign 466a7674320SMartin Cracauer * of bad coding, we have no choise than to narrow them down to one 467a7674320SMartin Cracauer * bit, since we must not send a trapcode that is not exactly one of 468a7674320SMartin Cracauer * the FPE_ macros. 469a7674320SMartin Cracauer * 470a7674320SMartin Cracauer * The mechanism has a static table with 127 entries. Each combination 471a7674320SMartin Cracauer * of the 7 FPU status word exception bits directly translates to a 472a7674320SMartin Cracauer * position in this table, where a single FPE_... value is stored. 473a7674320SMartin Cracauer * This FPE_... value stored there is considered the "most important" 474a7674320SMartin Cracauer * of the exception bits and will be sent as the signal code. The 475a7674320SMartin Cracauer * precedence of the bits is based upon Intel Document "Numerical 476a7674320SMartin Cracauer * Applications", Chapter "Special Computational Situations". 477a7674320SMartin Cracauer * 478a7674320SMartin Cracauer * The macro to choose one of these values does these steps: 1) Throw 479a7674320SMartin Cracauer * away status word bits that cannot be masked. 2) Throw away the bits 480a7674320SMartin Cracauer * currently masked in the control word, assuming the user isn't 481a7674320SMartin Cracauer * interested in them anymore. 3) Reinsert status word bit 7 (stack 482a7674320SMartin Cracauer * fault) if it is set, which cannot be masked but must be presered. 483a7674320SMartin Cracauer * 4) Use the remaining bits to point into the trapcode table. 484a7674320SMartin Cracauer * 485a7674320SMartin Cracauer * The 6 maskable bits in order of their preference, as stated in the 486a7674320SMartin Cracauer * above referenced Intel manual: 487a7674320SMartin Cracauer * 1 Invalid operation (FP_X_INV) 488a7674320SMartin Cracauer * 1a Stack underflow 489a7674320SMartin Cracauer * 1b Stack overflow 490a7674320SMartin Cracauer * 1c Operand of unsupported format 491a7674320SMartin Cracauer * 1d SNaN operand. 492a7674320SMartin Cracauer * 2 QNaN operand (not an exception, irrelavant here) 493a7674320SMartin Cracauer * 3 Any other invalid-operation not mentioned above or zero divide 494a7674320SMartin Cracauer * (FP_X_INV, FP_X_DZ) 495a7674320SMartin Cracauer * 4 Denormal operand (FP_X_DNML) 496a7674320SMartin Cracauer * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 497784648c6SMartin Cracauer * 6 Inexact result (FP_X_IMP) 498784648c6SMartin Cracauer */ 499a7674320SMartin Cracauer static char fpetable[128] = { 500a7674320SMartin Cracauer 0, 501a7674320SMartin Cracauer FPE_FLTINV, /* 1 - INV */ 502a7674320SMartin Cracauer FPE_FLTUND, /* 2 - DNML */ 503a7674320SMartin Cracauer FPE_FLTINV, /* 3 - INV | DNML */ 504a7674320SMartin Cracauer FPE_FLTDIV, /* 4 - DZ */ 505a7674320SMartin Cracauer FPE_FLTINV, /* 5 - INV | DZ */ 506a7674320SMartin Cracauer FPE_FLTDIV, /* 6 - DNML | DZ */ 507a7674320SMartin Cracauer FPE_FLTINV, /* 7 - INV | DNML | DZ */ 508a7674320SMartin Cracauer FPE_FLTOVF, /* 8 - OFL */ 509a7674320SMartin Cracauer FPE_FLTINV, /* 9 - INV | OFL */ 510a7674320SMartin Cracauer FPE_FLTUND, /* A - DNML | OFL */ 511a7674320SMartin Cracauer FPE_FLTINV, /* B - INV | DNML | OFL */ 512a7674320SMartin Cracauer FPE_FLTDIV, /* C - DZ | OFL */ 513a7674320SMartin Cracauer FPE_FLTINV, /* D - INV | DZ | OFL */ 514a7674320SMartin Cracauer FPE_FLTDIV, /* E - DNML | DZ | OFL */ 515a7674320SMartin Cracauer FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 516a7674320SMartin Cracauer FPE_FLTUND, /* 10 - UFL */ 517a7674320SMartin Cracauer FPE_FLTINV, /* 11 - INV | UFL */ 518a7674320SMartin Cracauer FPE_FLTUND, /* 12 - DNML | UFL */ 519a7674320SMartin Cracauer FPE_FLTINV, /* 13 - INV | DNML | UFL */ 520a7674320SMartin Cracauer FPE_FLTDIV, /* 14 - DZ | UFL */ 521a7674320SMartin Cracauer FPE_FLTINV, /* 15 - INV | DZ | UFL */ 522a7674320SMartin Cracauer FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 523a7674320SMartin Cracauer FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 524a7674320SMartin Cracauer FPE_FLTOVF, /* 18 - OFL | UFL */ 525a7674320SMartin Cracauer FPE_FLTINV, /* 19 - INV | OFL | UFL */ 526a7674320SMartin Cracauer FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 527a7674320SMartin Cracauer FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 528a7674320SMartin Cracauer FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 529a7674320SMartin Cracauer FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 530a7674320SMartin Cracauer FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 531a7674320SMartin Cracauer FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 532a7674320SMartin Cracauer FPE_FLTRES, /* 20 - IMP */ 533a7674320SMartin Cracauer FPE_FLTINV, /* 21 - INV | IMP */ 534a7674320SMartin Cracauer FPE_FLTUND, /* 22 - DNML | IMP */ 535a7674320SMartin Cracauer FPE_FLTINV, /* 23 - INV | DNML | IMP */ 536a7674320SMartin Cracauer FPE_FLTDIV, /* 24 - DZ | IMP */ 537a7674320SMartin Cracauer FPE_FLTINV, /* 25 - INV | DZ | IMP */ 538a7674320SMartin Cracauer FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 539a7674320SMartin Cracauer FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 540a7674320SMartin Cracauer FPE_FLTOVF, /* 28 - OFL | IMP */ 541a7674320SMartin Cracauer FPE_FLTINV, /* 29 - INV | OFL | IMP */ 542a7674320SMartin Cracauer FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 543a7674320SMartin Cracauer FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 544a7674320SMartin Cracauer FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 545a7674320SMartin Cracauer FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 546a7674320SMartin Cracauer FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 547a7674320SMartin Cracauer FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 548a7674320SMartin Cracauer FPE_FLTUND, /* 30 - UFL | IMP */ 549a7674320SMartin Cracauer FPE_FLTINV, /* 31 - INV | UFL | IMP */ 550a7674320SMartin Cracauer FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 551a7674320SMartin Cracauer FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 552a7674320SMartin Cracauer FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 553a7674320SMartin Cracauer FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 554a7674320SMartin Cracauer FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 555a7674320SMartin Cracauer FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 556a7674320SMartin Cracauer FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 557a7674320SMartin Cracauer FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 558a7674320SMartin Cracauer FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 559a7674320SMartin Cracauer FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 560a7674320SMartin Cracauer FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 561a7674320SMartin Cracauer FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 562a7674320SMartin Cracauer FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 563a7674320SMartin Cracauer FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 564a7674320SMartin Cracauer FPE_FLTSUB, /* 40 - STK */ 565a7674320SMartin Cracauer FPE_FLTSUB, /* 41 - INV | STK */ 566a7674320SMartin Cracauer FPE_FLTUND, /* 42 - DNML | STK */ 567a7674320SMartin Cracauer FPE_FLTSUB, /* 43 - INV | DNML | STK */ 568a7674320SMartin Cracauer FPE_FLTDIV, /* 44 - DZ | STK */ 569a7674320SMartin Cracauer FPE_FLTSUB, /* 45 - INV | DZ | STK */ 570a7674320SMartin Cracauer FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 571a7674320SMartin Cracauer FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 572a7674320SMartin Cracauer FPE_FLTOVF, /* 48 - OFL | STK */ 573a7674320SMartin Cracauer FPE_FLTSUB, /* 49 - INV | OFL | STK */ 574a7674320SMartin Cracauer FPE_FLTUND, /* 4A - DNML | OFL | STK */ 575a7674320SMartin Cracauer FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 576a7674320SMartin Cracauer FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 577a7674320SMartin Cracauer FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 578a7674320SMartin Cracauer FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 579a7674320SMartin Cracauer FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 580a7674320SMartin Cracauer FPE_FLTUND, /* 50 - UFL | STK */ 581a7674320SMartin Cracauer FPE_FLTSUB, /* 51 - INV | UFL | STK */ 582a7674320SMartin Cracauer FPE_FLTUND, /* 52 - DNML | UFL | STK */ 583a7674320SMartin Cracauer FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 584a7674320SMartin Cracauer FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 585a7674320SMartin Cracauer FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 586a7674320SMartin Cracauer FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 587a7674320SMartin Cracauer FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 588a7674320SMartin Cracauer FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 589a7674320SMartin Cracauer FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 590a7674320SMartin Cracauer FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 591a7674320SMartin Cracauer FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 592a7674320SMartin Cracauer FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 593a7674320SMartin Cracauer FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 594a7674320SMartin Cracauer FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 595a7674320SMartin Cracauer FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 596a7674320SMartin Cracauer FPE_FLTRES, /* 60 - IMP | STK */ 597a7674320SMartin Cracauer FPE_FLTSUB, /* 61 - INV | IMP | STK */ 598a7674320SMartin Cracauer FPE_FLTUND, /* 62 - DNML | IMP | STK */ 599a7674320SMartin Cracauer FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 600a7674320SMartin Cracauer FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 601a7674320SMartin Cracauer FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 602a7674320SMartin Cracauer FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 603a7674320SMartin Cracauer FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 604a7674320SMartin Cracauer FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 605a7674320SMartin Cracauer FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 606a7674320SMartin Cracauer FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 607a7674320SMartin Cracauer FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 608a7674320SMartin Cracauer FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 609a7674320SMartin Cracauer FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 610a7674320SMartin Cracauer FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 611a7674320SMartin Cracauer FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 612a7674320SMartin Cracauer FPE_FLTUND, /* 70 - UFL | IMP | STK */ 613a7674320SMartin Cracauer FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 614a7674320SMartin Cracauer FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 615a7674320SMartin Cracauer FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 616a7674320SMartin Cracauer FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 617a7674320SMartin Cracauer FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 618a7674320SMartin Cracauer FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 619a7674320SMartin Cracauer FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 620a7674320SMartin Cracauer FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 621a7674320SMartin Cracauer FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 622a7674320SMartin Cracauer FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 623a7674320SMartin Cracauer FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 624a7674320SMartin Cracauer FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 625a7674320SMartin Cracauer FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 626a7674320SMartin Cracauer FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 627a7674320SMartin Cracauer FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 628a7674320SMartin Cracauer }; 629a7674320SMartin Cracauer 630a7674320SMartin Cracauer /* 631dfa8a512SKonstantin Belousov * Read the FP status and control words, then generate si_code value 632dfa8a512SKonstantin Belousov * for SIGFPE. The error code chosen will be one of the 633dfa8a512SKonstantin Belousov * FPE_... macros. It will be sent as the second argument to old 634dfa8a512SKonstantin Belousov * BSD-style signal handlers and as "siginfo_t->si_code" (second 635dfa8a512SKonstantin Belousov * argument) to SA_SIGINFO signal handlers. 6365b81b6b3SRodney W. Grimes * 637dfa8a512SKonstantin Belousov * Some time ago, we cleared the x87 exceptions with FNCLEX there. 638dfa8a512SKonstantin Belousov * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The 639dfa8a512SKonstantin Belousov * usermode code which understands the FPU hardware enough to enable 640dfa8a512SKonstantin Belousov * the exceptions, can also handle clearing the exception state in the 641dfa8a512SKonstantin Belousov * handler. The only consequence of not clearing the exception is the 642dfa8a512SKonstantin Belousov * rethrow of the SIGFPE on return from the signal handler and 643dfa8a512SKonstantin Belousov * reexecution of the corresponding instruction. 644bc84db62SKonstantin Belousov * 645dfa8a512SKonstantin Belousov * For XMM traps, the exceptions were never cleared. 6465b81b6b3SRodney W. Grimes */ 6471c1771cbSBruce Evans int 648bc84db62SKonstantin Belousov fputrap_x87(void) 6495b81b6b3SRodney W. Grimes { 650bc84db62SKonstantin Belousov struct savefpu *pcb_save; 6511c1771cbSBruce Evans u_short control, status; 6525b81b6b3SRodney W. Grimes 65399753495SKonstantin Belousov critical_enter(); 6545b81b6b3SRodney W. Grimes 6555b81b6b3SRodney W. Grimes /* 6561c1771cbSBruce Evans * Interrupt handling (for another interrupt) may have pushed the 6571c1771cbSBruce Evans * state to memory. Fetch the relevant parts of the state from 6581c1771cbSBruce Evans * wherever they are. 6595b81b6b3SRodney W. Grimes */ 6600bbc8826SJohn Baldwin if (PCPU_GET(fpcurthread) != curthread) { 66183b22b05SKonstantin Belousov pcb_save = curpcb->pcb_save; 662bc84db62SKonstantin Belousov control = pcb_save->sv_env.en_cw; 663bc84db62SKonstantin Belousov status = pcb_save->sv_env.en_sw; 6645b81b6b3SRodney W. Grimes } else { 6651c1771cbSBruce Evans fnstcw(&control); 6661c1771cbSBruce Evans fnstsw(&status); 6675b81b6b3SRodney W. Grimes } 6681c1771cbSBruce Evans 66999753495SKonstantin Belousov critical_exit(); 6701c1771cbSBruce Evans return (fpetable[status & ((~control & 0x3f) | 0x40)]); 6715b81b6b3SRodney W. Grimes } 6725b81b6b3SRodney W. Grimes 673bc84db62SKonstantin Belousov int 674bc84db62SKonstantin Belousov fputrap_sse(void) 675bc84db62SKonstantin Belousov { 676bc84db62SKonstantin Belousov u_int mxcsr; 677bc84db62SKonstantin Belousov 678bc84db62SKonstantin Belousov critical_enter(); 679bc84db62SKonstantin Belousov if (PCPU_GET(fpcurthread) != curthread) 68083b22b05SKonstantin Belousov mxcsr = curpcb->pcb_save->sv_env.en_mxcsr; 681bc84db62SKonstantin Belousov else 682bc84db62SKonstantin Belousov stmxcsr(&mxcsr); 683bc84db62SKonstantin Belousov critical_exit(); 684bc84db62SKonstantin Belousov return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); 685bc84db62SKonstantin Belousov } 686bc84db62SKonstantin Belousov 687d1a07e31SKonstantin Belousov static void 688d1a07e31SKonstantin Belousov restore_fpu_curthread(struct thread *td) 689d1a07e31SKonstantin Belousov { 690d1a07e31SKonstantin Belousov struct pcb *pcb; 691d1a07e31SKonstantin Belousov 692d1a07e31SKonstantin Belousov /* 693d1a07e31SKonstantin Belousov * Record new context early in case frstor causes a trap. 694d1a07e31SKonstantin Belousov */ 695d1a07e31SKonstantin Belousov PCPU_SET(fpcurthread, td); 696d1a07e31SKonstantin Belousov 697d1a07e31SKonstantin Belousov stop_emulating(); 698d1a07e31SKonstantin Belousov fpu_clean_state(); 699d1a07e31SKonstantin Belousov pcb = td->td_pcb; 700d1a07e31SKonstantin Belousov 701d1a07e31SKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { 702d1a07e31SKonstantin Belousov /* 703d1a07e31SKonstantin Belousov * This is the first time this thread has used the FPU or 704d1a07e31SKonstantin Belousov * the PCB doesn't contain a clean FPU state. Explicitly 705d1a07e31SKonstantin Belousov * load an initial state. 706d1a07e31SKonstantin Belousov * 707d1a07e31SKonstantin Belousov * We prefer to restore the state from the actual save 708d1a07e31SKonstantin Belousov * area in PCB instead of directly loading from 709d1a07e31SKonstantin Belousov * fpu_initialstate, to ignite the XSAVEOPT 710d1a07e31SKonstantin Belousov * tracking engine. 711d1a07e31SKonstantin Belousov */ 712d1a07e31SKonstantin Belousov bcopy(fpu_initialstate, pcb->pcb_save, 713d1a07e31SKonstantin Belousov cpu_max_ext_state_size); 714d1a07e31SKonstantin Belousov fpurestore(pcb->pcb_save); 715d1a07e31SKonstantin Belousov if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) 716d1a07e31SKonstantin Belousov fldcw(pcb->pcb_initial_fpucw); 717d1a07e31SKonstantin Belousov if (PCB_USER_FPU(pcb)) 718d1a07e31SKonstantin Belousov set_pcb_flags(pcb, PCB_FPUINITDONE | 719d1a07e31SKonstantin Belousov PCB_USERFPUINITDONE); 720d1a07e31SKonstantin Belousov else 721d1a07e31SKonstantin Belousov set_pcb_flags(pcb, PCB_FPUINITDONE); 722d1a07e31SKonstantin Belousov } else 723d1a07e31SKonstantin Belousov fpurestore(pcb->pcb_save); 724d1a07e31SKonstantin Belousov } 725d1a07e31SKonstantin Belousov 7266dfc9e44SKonstantin Belousov /* 7276dfc9e44SKonstantin Belousov * Device Not Available (DNA, #NM) exception handler. 7286dfc9e44SKonstantin Belousov * 7296dfc9e44SKonstantin Belousov * It would be better to switch FP context here (if curthread != 7306dfc9e44SKonstantin Belousov * fpcurthread) and not necessarily for every context switch, but it 7316dfc9e44SKonstantin Belousov * is too hard to access foreign pcb's. 7326dfc9e44SKonstantin Belousov */ 733a8346a98SJohn Baldwin void 734a8346a98SJohn Baldwin fpudna(void) 7355b81b6b3SRodney W. Grimes { 736d1a07e31SKonstantin Belousov struct thread *td; 73705f6ee66SJake Burkholder 738d1a07e31SKonstantin Belousov td = curthread; 739060cd4d5SKonstantin Belousov /* 740060cd4d5SKonstantin Belousov * This handler is entered with interrupts enabled, so context 741060cd4d5SKonstantin Belousov * switches may occur before critical_enter() is executed. If 742060cd4d5SKonstantin Belousov * a context switch occurs, then when we regain control, our 743060cd4d5SKonstantin Belousov * state will have been completely restored. The CPU may 744060cd4d5SKonstantin Belousov * change underneath us, but the only part of our context that 745060cd4d5SKonstantin Belousov * lives in the CPU is CR0.TS and that will be "restored" by 746060cd4d5SKonstantin Belousov * setting it on the new CPU. 747060cd4d5SKonstantin Belousov */ 74899753495SKonstantin Belousov critical_enter(); 749060cd4d5SKonstantin Belousov 750cf1c4776SKonstantin Belousov KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, 751cf1c4776SKonstantin Belousov ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); 7525803d744SKonstantin Belousov if (__predict_false(PCPU_GET(fpcurthread) == td)) { 753fa7fad8aSKonstantin Belousov /* 754fa7fad8aSKonstantin Belousov * Some virtual machines seems to set %cr0.TS at 755fa7fad8aSKonstantin Belousov * arbitrary moments. Silently clear the TS bit 756fa7fad8aSKonstantin Belousov * regardless of the eager/lazy FPU context switch 757fa7fad8aSKonstantin Belousov * mode. 758fa7fad8aSKonstantin Belousov */ 75930abe507SJonathan Mini stop_emulating(); 7605803d744SKonstantin Belousov } else { 7615803d744SKonstantin Belousov if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { 7625803d744SKonstantin Belousov panic( 7635803d744SKonstantin Belousov "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", 7645803d744SKonstantin Belousov PCPU_GET(fpcurthread), 7655803d744SKonstantin Belousov PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); 7665b81b6b3SRodney W. Grimes } 767d1a07e31SKonstantin Belousov restore_fpu_curthread(td); 7685803d744SKonstantin Belousov } 76999753495SKonstantin Belousov critical_exit(); 7705b81b6b3SRodney W. Grimes } 7715b81b6b3SRodney W. Grimes 772d1a07e31SKonstantin Belousov void fpu_activate_sw(struct thread *td); /* Called from the context switch */ 773d1a07e31SKonstantin Belousov void 774d1a07e31SKonstantin Belousov fpu_activate_sw(struct thread *td) 775d1a07e31SKonstantin Belousov { 776d1a07e31SKonstantin Belousov 777d1a07e31SKonstantin Belousov if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || 778d1a07e31SKonstantin Belousov !PCB_USER_FPU(td->td_pcb)) { 779d1a07e31SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 780d1a07e31SKonstantin Belousov start_emulating(); 781d1a07e31SKonstantin Belousov } else if (PCPU_GET(fpcurthread) != td) { 782d1a07e31SKonstantin Belousov restore_fpu_curthread(td); 783d1a07e31SKonstantin Belousov } 784d1a07e31SKonstantin Belousov } 785d1a07e31SKonstantin Belousov 78630abe507SJonathan Mini void 787f132cd05SKonstantin Belousov fpudrop(void) 78830abe507SJonathan Mini { 78930abe507SJonathan Mini struct thread *td; 79030abe507SJonathan Mini 79130abe507SJonathan Mini td = PCPU_GET(fpcurthread); 79299753495SKonstantin Belousov KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 7934a23ecc7SKonstantin Belousov CRITICAL_ASSERT(td); 79430abe507SJonathan Mini PCPU_SET(fpcurthread, NULL); 795e6c006d9SJung-uk Kim clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); 79630abe507SJonathan Mini start_emulating(); 79730abe507SJonathan Mini } 79830abe507SJonathan Mini 79930abe507SJonathan Mini /* 8005c6eb037SKonstantin Belousov * Get the user state of the FPU into pcb->pcb_user_save without 8015c6eb037SKonstantin Belousov * dropping ownership (if possible). It returns the FPU ownership 8025c6eb037SKonstantin Belousov * status. 80330abe507SJonathan Mini */ 80430abe507SJonathan Mini int 8055c6eb037SKonstantin Belousov fpugetregs(struct thread *td) 8066cf9a08dSKonstantin Belousov { 8076cf9a08dSKonstantin Belousov struct pcb *pcb; 808333d0c60SKonstantin Belousov uint64_t *xstate_bv, bit; 809333d0c60SKonstantin Belousov char *sa; 81014f52559SKonstantin Belousov int max_ext_n, i, owned; 8116cf9a08dSKonstantin Belousov 8126cf9a08dSKonstantin Belousov pcb = td->td_pcb; 81341bed185SKonstantin Belousov critical_enter(); 8146cf9a08dSKonstantin Belousov if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { 8158c6f8f3dSKonstantin Belousov bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), 8168c6f8f3dSKonstantin Belousov cpu_max_ext_state_size); 8178c6f8f3dSKonstantin Belousov get_pcb_user_save_pcb(pcb)->sv_env.en_cw = 8188c6f8f3dSKonstantin Belousov pcb->pcb_initial_fpucw; 8195c6eb037SKonstantin Belousov fpuuserinited(td); 82041bed185SKonstantin Belousov critical_exit(); 8215c6eb037SKonstantin Belousov return (_MC_FPOWNED_PCB); 8226cf9a08dSKonstantin Belousov } 8236cf9a08dSKonstantin Belousov if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 8248c6f8f3dSKonstantin Belousov fpusave(get_pcb_user_save_pcb(pcb)); 82514f52559SKonstantin Belousov owned = _MC_FPOWNED_FPU; 8266cf9a08dSKonstantin Belousov } else { 82714f52559SKonstantin Belousov owned = _MC_FPOWNED_PCB; 82814f52559SKonstantin Belousov } 82914f52559SKonstantin Belousov if (use_xsave) { 830333d0c60SKonstantin Belousov /* 831333d0c60SKonstantin Belousov * Handle partially saved state. 832333d0c60SKonstantin Belousov */ 833333d0c60SKonstantin Belousov sa = (char *)get_pcb_user_save_pcb(pcb); 834333d0c60SKonstantin Belousov xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + 835333d0c60SKonstantin Belousov offsetof(struct xstate_hdr, xstate_bv)); 836333d0c60SKonstantin Belousov max_ext_n = flsl(xsave_mask); 837333d0c60SKonstantin Belousov for (i = 0; i < max_ext_n; i++) { 838241b67bbSKonstantin Belousov bit = 1ULL << i; 839241b67bbSKonstantin Belousov if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) 840333d0c60SKonstantin Belousov continue; 841333d0c60SKonstantin Belousov bcopy((char *)fpu_initialstate + 842333d0c60SKonstantin Belousov xsave_area_desc[i].offset, 843333d0c60SKonstantin Belousov sa + xsave_area_desc[i].offset, 844333d0c60SKonstantin Belousov xsave_area_desc[i].size); 845333d0c60SKonstantin Belousov *xstate_bv |= bit; 846333d0c60SKonstantin Belousov } 847333d0c60SKonstantin Belousov } 84841bed185SKonstantin Belousov critical_exit(); 84914f52559SKonstantin Belousov return (owned); 8506cf9a08dSKonstantin Belousov } 8516cf9a08dSKonstantin Belousov 8525c6eb037SKonstantin Belousov void 8535c6eb037SKonstantin Belousov fpuuserinited(struct thread *td) 85430abe507SJonathan Mini { 8556cf9a08dSKonstantin Belousov struct pcb *pcb; 85630abe507SJonathan Mini 85741bed185SKonstantin Belousov CRITICAL_ASSERT(td); 8586cf9a08dSKonstantin Belousov pcb = td->td_pcb; 8595c6eb037SKonstantin Belousov if (PCB_USER_FPU(pcb)) 860e6c006d9SJung-uk Kim set_pcb_flags(pcb, 861e6c006d9SJung-uk Kim PCB_FPUINITDONE | PCB_USERFPUINITDONE); 862e6c006d9SJung-uk Kim else 863e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 86430abe507SJonathan Mini } 86530abe507SJonathan Mini 8668c6f8f3dSKonstantin Belousov int 8678c6f8f3dSKonstantin Belousov fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) 8688c6f8f3dSKonstantin Belousov { 8698c6f8f3dSKonstantin Belousov struct xstate_hdr *hdr, *ehdr; 8708c6f8f3dSKonstantin Belousov size_t len, max_len; 8718c6f8f3dSKonstantin Belousov uint64_t bv; 8728c6f8f3dSKonstantin Belousov 8738c6f8f3dSKonstantin Belousov /* XXXKIB should we clear all extended state in xstate_bv instead ? */ 8748c6f8f3dSKonstantin Belousov if (xfpustate == NULL) 8758c6f8f3dSKonstantin Belousov return (0); 8768c6f8f3dSKonstantin Belousov if (!use_xsave) 8778c6f8f3dSKonstantin Belousov return (EOPNOTSUPP); 8788c6f8f3dSKonstantin Belousov 8798c6f8f3dSKonstantin Belousov len = xfpustate_size; 8808c6f8f3dSKonstantin Belousov if (len < sizeof(struct xstate_hdr)) 8818c6f8f3dSKonstantin Belousov return (EINVAL); 8828c6f8f3dSKonstantin Belousov max_len = cpu_max_ext_state_size - sizeof(struct savefpu); 8838c6f8f3dSKonstantin Belousov if (len > max_len) 8848c6f8f3dSKonstantin Belousov return (EINVAL); 8858c6f8f3dSKonstantin Belousov 8868c6f8f3dSKonstantin Belousov ehdr = (struct xstate_hdr *)xfpustate; 8878c6f8f3dSKonstantin Belousov bv = ehdr->xstate_bv; 8888c6f8f3dSKonstantin Belousov 8898c6f8f3dSKonstantin Belousov /* 8908c6f8f3dSKonstantin Belousov * Avoid #gp. 8918c6f8f3dSKonstantin Belousov */ 8928c6f8f3dSKonstantin Belousov if (bv & ~xsave_mask) 8938c6f8f3dSKonstantin Belousov return (EINVAL); 8948c6f8f3dSKonstantin Belousov 8958c6f8f3dSKonstantin Belousov hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); 8968c6f8f3dSKonstantin Belousov 8978c6f8f3dSKonstantin Belousov hdr->xstate_bv = bv; 8988c6f8f3dSKonstantin Belousov bcopy(xfpustate + sizeof(struct xstate_hdr), 8998c6f8f3dSKonstantin Belousov (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); 9008c6f8f3dSKonstantin Belousov 9018c6f8f3dSKonstantin Belousov return (0); 9028c6f8f3dSKonstantin Belousov } 9038c6f8f3dSKonstantin Belousov 90430abe507SJonathan Mini /* 90530abe507SJonathan Mini * Set the state of the FPU. 90630abe507SJonathan Mini */ 9078c6f8f3dSKonstantin Belousov int 9088c6f8f3dSKonstantin Belousov fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, 9098c6f8f3dSKonstantin Belousov size_t xfpustate_size) 9106cf9a08dSKonstantin Belousov { 9116cf9a08dSKonstantin Belousov struct pcb *pcb; 9128c6f8f3dSKonstantin Belousov int error; 9136cf9a08dSKonstantin Belousov 914aa788cc3SKonstantin Belousov addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; 9156cf9a08dSKonstantin Belousov pcb = td->td_pcb; 91641bed185SKonstantin Belousov error = 0; 91799753495SKonstantin Belousov critical_enter(); 9186cf9a08dSKonstantin Belousov if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 9198c6f8f3dSKonstantin Belousov error = fpusetxstate(td, xfpustate, xfpustate_size); 92041bed185SKonstantin Belousov if (error == 0) { 9218c6f8f3dSKonstantin Belousov bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 9228c6f8f3dSKonstantin Belousov fpurestore(get_pcb_user_save_td(td)); 92341bed185SKonstantin Belousov set_pcb_flags(pcb, PCB_FPUINITDONE | 92441bed185SKonstantin Belousov PCB_USERFPUINITDONE); 92541bed185SKonstantin Belousov } 9266cf9a08dSKonstantin Belousov } else { 9278c6f8f3dSKonstantin Belousov error = fpusetxstate(td, xfpustate, xfpustate_size); 92841bed185SKonstantin Belousov if (error == 0) { 9298c6f8f3dSKonstantin Belousov bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 9305c6eb037SKonstantin Belousov fpuuserinited(td); 9316cf9a08dSKonstantin Belousov } 93241bed185SKonstantin Belousov } 93341bed185SKonstantin Belousov critical_exit(); 93441bed185SKonstantin Belousov return (error); 9356cf9a08dSKonstantin Belousov } 9366cf9a08dSKonstantin Belousov 9376182fdbdSPeter Wemm /* 9382652af56SColin Percival * On AuthenticAMD processors, the fxrstor instruction does not restore 9392652af56SColin Percival * the x87's stored last instruction pointer, last data pointer, and last 9402652af56SColin Percival * opcode values, except in the rare case in which the exception summary 9412652af56SColin Percival * (ES) bit in the x87 status word is set to 1. 9422652af56SColin Percival * 9432652af56SColin Percival * In order to avoid leaking this information across processes, we clean 9442652af56SColin Percival * these values by performing a dummy load before executing fxrstor(). 9452652af56SColin Percival */ 9462652af56SColin Percival static void 9472652af56SColin Percival fpu_clean_state(void) 9482652af56SColin Percival { 949b9dda9d6SJohn Baldwin static float dummy_variable = 0.0; 9502652af56SColin Percival u_short status; 9512652af56SColin Percival 9522652af56SColin Percival /* 9532652af56SColin Percival * Clear the ES bit in the x87 status word if it is currently 9542652af56SColin Percival * set, in order to avoid causing a fault in the upcoming load. 9552652af56SColin Percival */ 9562652af56SColin Percival fnstsw(&status); 9572652af56SColin Percival if (status & 0x80) 9582652af56SColin Percival fnclex(); 9592652af56SColin Percival 9602652af56SColin Percival /* 9612652af56SColin Percival * Load the dummy variable into the x87 stack. This mangles 9622652af56SColin Percival * the x87 stack, but we don't care since we're about to call 9632652af56SColin Percival * fxrstor() anyway. 9642652af56SColin Percival */ 96514965052SDimitry Andric __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 9662652af56SColin Percival } 9672652af56SColin Percival 9682652af56SColin Percival /* 969398dbb11SPeter Wemm * This really sucks. We want the acpi version only, but it requires 970398dbb11SPeter Wemm * the isa_if.h file in order to get the definitions. 9716182fdbdSPeter Wemm */ 972398dbb11SPeter Wemm #include "opt_isa.h" 973afa88623SPeter Wemm #ifdef DEV_ISA 974398dbb11SPeter Wemm #include <isa/isavar.h> 97554f1d0ceSGarrett Wollman /* 9765f063c7bSMike Smith * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 97754f1d0ceSGarrett Wollman */ 978398dbb11SPeter Wemm static struct isa_pnp_id fpupnp_ids[] = { 97954f1d0ceSGarrett Wollman { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 98054f1d0ceSGarrett Wollman { 0 } 98154f1d0ceSGarrett Wollman }; 98254f1d0ceSGarrett Wollman 98354f1d0ceSGarrett Wollman static int 984398dbb11SPeter Wemm fpupnp_probe(device_t dev) 98554f1d0ceSGarrett Wollman { 986bb9c06c1SMike Smith int result; 987bf2f09eeSPeter Wemm 988398dbb11SPeter Wemm result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); 989bf2f09eeSPeter Wemm if (result <= 0) 990bb9c06c1SMike Smith device_quiet(dev); 991bb9c06c1SMike Smith return (result); 99254f1d0ceSGarrett Wollman } 99354f1d0ceSGarrett Wollman 99454f1d0ceSGarrett Wollman static int 995398dbb11SPeter Wemm fpupnp_attach(device_t dev) 99654f1d0ceSGarrett Wollman { 997bf2f09eeSPeter Wemm 99854f1d0ceSGarrett Wollman return (0); 99954f1d0ceSGarrett Wollman } 100054f1d0ceSGarrett Wollman 1001398dbb11SPeter Wemm static device_method_t fpupnp_methods[] = { 100254f1d0ceSGarrett Wollman /* Device interface */ 1003398dbb11SPeter Wemm DEVMETHOD(device_probe, fpupnp_probe), 1004398dbb11SPeter Wemm DEVMETHOD(device_attach, fpupnp_attach), 100554f1d0ceSGarrett Wollman DEVMETHOD(device_detach, bus_generic_detach), 100654f1d0ceSGarrett Wollman DEVMETHOD(device_shutdown, bus_generic_shutdown), 100754f1d0ceSGarrett Wollman DEVMETHOD(device_suspend, bus_generic_suspend), 100854f1d0ceSGarrett Wollman DEVMETHOD(device_resume, bus_generic_resume), 100954f1d0ceSGarrett Wollman 101054f1d0ceSGarrett Wollman { 0, 0 } 101154f1d0ceSGarrett Wollman }; 101254f1d0ceSGarrett Wollman 1013398dbb11SPeter Wemm static driver_t fpupnp_driver = { 1014398dbb11SPeter Wemm "fpupnp", 1015398dbb11SPeter Wemm fpupnp_methods, 101654f1d0ceSGarrett Wollman 1, /* no softc */ 101754f1d0ceSGarrett Wollman }; 101854f1d0ceSGarrett Wollman 1019398dbb11SPeter Wemm static devclass_t fpupnp_devclass; 102054f1d0ceSGarrett Wollman 1021398dbb11SPeter Wemm DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); 1022d6b66397SWarner Losh ISA_PNP_INFO(fpupnp_ids); 1023586079ccSBruce Evans #endif /* DEV_ISA */ 10246cf9a08dSKonstantin Belousov 10258c6f8f3dSKonstantin Belousov static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 10268c6f8f3dSKonstantin Belousov "Kernel contexts for FPU state"); 10278c6f8f3dSKonstantin Belousov 10288c6f8f3dSKonstantin Belousov #define FPU_KERN_CTX_FPUINITDONE 0x01 1029633034feSKonstantin Belousov #define FPU_KERN_CTX_DUMMY 0x02 /* avoided save for the kern thread */ 1030e808e13bSJohn-Mark Gurney #define FPU_KERN_CTX_INUSE 0x04 10318c6f8f3dSKonstantin Belousov 10328c6f8f3dSKonstantin Belousov struct fpu_kern_ctx { 10338c6f8f3dSKonstantin Belousov struct savefpu *prev; 10348c6f8f3dSKonstantin Belousov uint32_t flags; 10358c6f8f3dSKonstantin Belousov char hwstate1[]; 10368c6f8f3dSKonstantin Belousov }; 10378c6f8f3dSKonstantin Belousov 10388c6f8f3dSKonstantin Belousov struct fpu_kern_ctx * 10398c6f8f3dSKonstantin Belousov fpu_kern_alloc_ctx(u_int flags) 10408c6f8f3dSKonstantin Belousov { 10418c6f8f3dSKonstantin Belousov struct fpu_kern_ctx *res; 10428c6f8f3dSKonstantin Belousov size_t sz; 10438c6f8f3dSKonstantin Belousov 10448c6f8f3dSKonstantin Belousov sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + 10458c6f8f3dSKonstantin Belousov cpu_max_ext_state_size; 10468c6f8f3dSKonstantin Belousov res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? 10478c6f8f3dSKonstantin Belousov M_NOWAIT : M_WAITOK) | M_ZERO); 10488c6f8f3dSKonstantin Belousov return (res); 10498c6f8f3dSKonstantin Belousov } 10508c6f8f3dSKonstantin Belousov 10518c6f8f3dSKonstantin Belousov void 10528c6f8f3dSKonstantin Belousov fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 10538c6f8f3dSKonstantin Belousov { 10548c6f8f3dSKonstantin Belousov 1055e808e13bSJohn-Mark Gurney KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); 10568c6f8f3dSKonstantin Belousov /* XXXKIB clear the memory ? */ 10578c6f8f3dSKonstantin Belousov free(ctx, M_FPUKERN_CTX); 10588c6f8f3dSKonstantin Belousov } 10598c6f8f3dSKonstantin Belousov 10608c6f8f3dSKonstantin Belousov static struct savefpu * 10618c6f8f3dSKonstantin Belousov fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) 10628c6f8f3dSKonstantin Belousov { 10638c6f8f3dSKonstantin Belousov vm_offset_t p; 10648c6f8f3dSKonstantin Belousov 10658c6f8f3dSKonstantin Belousov p = (vm_offset_t)&ctx->hwstate1; 10668c6f8f3dSKonstantin Belousov p = roundup2(p, XSAVE_AREA_ALIGN); 10678c6f8f3dSKonstantin Belousov return ((struct savefpu *)p); 10688c6f8f3dSKonstantin Belousov } 10698c6f8f3dSKonstantin Belousov 1070849ce31aSConrad Meyer void 10716cf9a08dSKonstantin Belousov fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 10726cf9a08dSKonstantin Belousov { 10736cf9a08dSKonstantin Belousov struct pcb *pcb; 10746cf9a08dSKonstantin Belousov 1075cf1c4776SKonstantin Belousov pcb = td->td_pcb; 1076cf1c4776SKonstantin Belousov KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, 1077cf1c4776SKonstantin Belousov ("ctx is required when !FPU_KERN_NOCTX")); 1078cf1c4776SKonstantin Belousov KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, 1079cf1c4776SKonstantin Belousov ("using inuse ctx")); 1080cf1c4776SKonstantin Belousov KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0, 1081cf1c4776SKonstantin Belousov ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state")); 1082e808e13bSJohn-Mark Gurney 1083cf1c4776SKonstantin Belousov if ((flags & FPU_KERN_NOCTX) != 0) { 1084cf1c4776SKonstantin Belousov critical_enter(); 1085cf1c4776SKonstantin Belousov stop_emulating(); 1086cf1c4776SKonstantin Belousov if (curthread == PCPU_GET(fpcurthread)) { 1087cf1c4776SKonstantin Belousov fpusave(curpcb->pcb_save); 1088cf1c4776SKonstantin Belousov PCPU_SET(fpcurthread, NULL); 1089cf1c4776SKonstantin Belousov } else { 1090cf1c4776SKonstantin Belousov KASSERT(PCPU_GET(fpcurthread) == NULL, 1091cf1c4776SKonstantin Belousov ("invalid fpcurthread")); 1092cf1c4776SKonstantin Belousov } 1093cf1c4776SKonstantin Belousov 1094cf1c4776SKonstantin Belousov /* 1095cf1c4776SKonstantin Belousov * This breaks XSAVEOPT tracker, but 1096cf1c4776SKonstantin Belousov * PCB_FPUNOSAVE state is supposed to never need to 1097cf1c4776SKonstantin Belousov * save FPU context at all. 1098cf1c4776SKonstantin Belousov */ 1099cf1c4776SKonstantin Belousov fpurestore(fpu_initialstate); 1100cf1c4776SKonstantin Belousov set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE | 1101cf1c4776SKonstantin Belousov PCB_FPUINITDONE); 1102849ce31aSConrad Meyer return; 1103cf1c4776SKonstantin Belousov } 1104633034feSKonstantin Belousov if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { 1105e808e13bSJohn-Mark Gurney ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; 1106849ce31aSConrad Meyer return; 1107633034feSKonstantin Belousov } 110841bed185SKonstantin Belousov critical_enter(); 11098c6f8f3dSKonstantin Belousov KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == 11108c6f8f3dSKonstantin Belousov get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); 1111e808e13bSJohn-Mark Gurney ctx->flags = FPU_KERN_CTX_INUSE; 11126cf9a08dSKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) 11136cf9a08dSKonstantin Belousov ctx->flags |= FPU_KERN_CTX_FPUINITDONE; 11146cf9a08dSKonstantin Belousov fpuexit(td); 11156cf9a08dSKonstantin Belousov ctx->prev = pcb->pcb_save; 11168c6f8f3dSKonstantin Belousov pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); 1117e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_KERNFPU); 1118e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE); 111941bed185SKonstantin Belousov critical_exit(); 11206cf9a08dSKonstantin Belousov } 11216cf9a08dSKonstantin Belousov 11226cf9a08dSKonstantin Belousov int 11236cf9a08dSKonstantin Belousov fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 11246cf9a08dSKonstantin Belousov { 11256cf9a08dSKonstantin Belousov struct pcb *pcb; 11266cf9a08dSKonstantin Belousov 1127cf1c4776SKonstantin Belousov pcb = td->td_pcb; 1128cf1c4776SKonstantin Belousov 1129cf1c4776SKonstantin Belousov if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) { 1130cf1c4776SKonstantin Belousov KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); 1131cf1c4776SKonstantin Belousov KASSERT(PCPU_GET(fpcurthread) == NULL, 1132cf1c4776SKonstantin Belousov ("non-NULL fpcurthread for PCB_FPUNOSAVE")); 1133cf1c4776SKonstantin Belousov CRITICAL_ASSERT(td); 1134cf1c4776SKonstantin Belousov 1135cf1c4776SKonstantin Belousov clear_pcb_flags(pcb, PCB_FPUNOSAVE | PCB_FPUINITDONE); 1136cf1c4776SKonstantin Belousov start_emulating(); 1137cf1c4776SKonstantin Belousov } else { 1138e808e13bSJohn-Mark Gurney KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, 1139e808e13bSJohn-Mark Gurney ("leaving not inuse ctx")); 1140e808e13bSJohn-Mark Gurney ctx->flags &= ~FPU_KERN_CTX_INUSE; 1141e808e13bSJohn-Mark Gurney 1142cf1c4776SKonstantin Belousov if (is_fpu_kern_thread(0) && 1143cf1c4776SKonstantin Belousov (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) 1144633034feSKonstantin Belousov return (0); 1145cf1c4776SKonstantin Belousov KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, 1146cf1c4776SKonstantin Belousov ("dummy ctx")); 114799753495SKonstantin Belousov critical_enter(); 11486cf9a08dSKonstantin Belousov if (curthread == PCPU_GET(fpcurthread)) 11496cf9a08dSKonstantin Belousov fpudrop(); 11506cf9a08dSKonstantin Belousov pcb->pcb_save = ctx->prev; 1151cf1c4776SKonstantin Belousov } 1152cf1c4776SKonstantin Belousov 11538c6f8f3dSKonstantin Belousov if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { 1154e6c006d9SJung-uk Kim if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { 1155e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 1156e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_KERNFPU); 1157e6c006d9SJung-uk Kim } else 1158e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); 11596cf9a08dSKonstantin Belousov } else { 11606cf9a08dSKonstantin Belousov if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) 1161e6c006d9SJung-uk Kim set_pcb_flags(pcb, PCB_FPUINITDONE); 11626cf9a08dSKonstantin Belousov else 1163e6c006d9SJung-uk Kim clear_pcb_flags(pcb, PCB_FPUINITDONE); 11646cf9a08dSKonstantin Belousov KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 11656cf9a08dSKonstantin Belousov } 116641bed185SKonstantin Belousov critical_exit(); 11676cf9a08dSKonstantin Belousov return (0); 11686cf9a08dSKonstantin Belousov } 11696cf9a08dSKonstantin Belousov 11706cf9a08dSKonstantin Belousov int 11716cf9a08dSKonstantin Belousov fpu_kern_thread(u_int flags) 11726cf9a08dSKonstantin Belousov { 11736cf9a08dSKonstantin Belousov 11746cf9a08dSKonstantin Belousov KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 11756cf9a08dSKonstantin Belousov ("Only kthread may use fpu_kern_thread")); 11761965c139SKonstantin Belousov KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), 11778c6f8f3dSKonstantin Belousov ("mangled pcb_save")); 11781965c139SKonstantin Belousov KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); 11796cf9a08dSKonstantin Belousov 11801965c139SKonstantin Belousov set_pcb_flags(curpcb, PCB_KERNFPU); 11816cf9a08dSKonstantin Belousov return (0); 11826cf9a08dSKonstantin Belousov } 11836cf9a08dSKonstantin Belousov 11846cf9a08dSKonstantin Belousov int 11856cf9a08dSKonstantin Belousov is_fpu_kern_thread(u_int flags) 11866cf9a08dSKonstantin Belousov { 11876cf9a08dSKonstantin Belousov 11886cf9a08dSKonstantin Belousov if ((curthread->td_pflags & TDP_KTHREAD) == 0) 11896cf9a08dSKonstantin Belousov return (0); 119083b22b05SKonstantin Belousov return ((curpcb->pcb_flags & PCB_KERNFPU) != 0); 11916cf9a08dSKonstantin Belousov } 11922741efecSPeter Grehan 11932741efecSPeter Grehan /* 11942741efecSPeter Grehan * FPU save area alloc/free/init utility routines 11952741efecSPeter Grehan */ 11962741efecSPeter Grehan struct savefpu * 11972741efecSPeter Grehan fpu_save_area_alloc(void) 11982741efecSPeter Grehan { 11992741efecSPeter Grehan 12002741efecSPeter Grehan return (uma_zalloc(fpu_save_area_zone, 0)); 12012741efecSPeter Grehan } 12022741efecSPeter Grehan 12032741efecSPeter Grehan void 12042741efecSPeter Grehan fpu_save_area_free(struct savefpu *fsa) 12052741efecSPeter Grehan { 12062741efecSPeter Grehan 12072741efecSPeter Grehan uma_zfree(fpu_save_area_zone, fsa); 12082741efecSPeter Grehan } 12092741efecSPeter Grehan 12102741efecSPeter Grehan void 12112741efecSPeter Grehan fpu_save_area_reset(struct savefpu *fsa) 12122741efecSPeter Grehan { 12132741efecSPeter Grehan 12142741efecSPeter Grehan bcopy(fpu_initialstate, fsa, cpu_max_ext_state_size); 12152741efecSPeter Grehan } 1216