1f0089e39SRichard Lowe /* 2f0089e39SRichard Lowe * CDDL HEADER START 3f0089e39SRichard Lowe * 4f0089e39SRichard Lowe * The contents of this file are subject to the terms of the 5f0089e39SRichard Lowe * Common Development and Distribution License (the "License"). 6f0089e39SRichard Lowe * You may not use this file except in compliance with the License. 7f0089e39SRichard Lowe * 8f0089e39SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9f0089e39SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10f0089e39SRichard Lowe * See the License for the specific language governing permissions 11f0089e39SRichard Lowe * and limitations under the License. 12f0089e39SRichard Lowe * 13f0089e39SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14f0089e39SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15f0089e39SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16f0089e39SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17f0089e39SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18f0089e39SRichard Lowe * 19f0089e39SRichard Lowe * CDDL HEADER END 20f0089e39SRichard Lowe */ 21f0089e39SRichard Lowe /* 22f0089e39SRichard Lowe * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 23f0089e39SRichard Lowe * Copyright 2021 Joyent, Inc. 24f0089e39SRichard Lowe * Copyright 2021 RackTop Systems, Inc. 25*957246c9SPatrick Mooney * Copyright 2022 Oxide Computer Company 26f0089e39SRichard Lowe */ 27f0089e39SRichard Lowe 28f0089e39SRichard Lowe /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 29f0089e39SRichard Lowe /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 30f0089e39SRichard Lowe /* All Rights Reserved */ 31f0089e39SRichard Lowe 32f0089e39SRichard Lowe /* Copyright (c) 1987, 1988 Microsoft Corporation */ 33f0089e39SRichard Lowe /* All Rights Reserved */ 34f0089e39SRichard Lowe 35f0089e39SRichard Lowe /* 36f0089e39SRichard Lowe * Copyright (c) 2009, Intel Corporation. 37f0089e39SRichard Lowe * All rights reserved. 38f0089e39SRichard Lowe */ 39f0089e39SRichard Lowe 40f0089e39SRichard Lowe #include <sys/types.h> 41f0089e39SRichard Lowe #include <sys/param.h> 42f0089e39SRichard Lowe #include <sys/signal.h> 43f0089e39SRichard Lowe #include <sys/regset.h> 44f0089e39SRichard Lowe #include <sys/privregs.h> 45f0089e39SRichard Lowe #include <sys/psw.h> 46f0089e39SRichard Lowe #include <sys/trap.h> 47f0089e39SRichard Lowe #include <sys/fault.h> 48f0089e39SRichard Lowe #include <sys/systm.h> 49f0089e39SRichard Lowe #include <sys/user.h> 50f0089e39SRichard Lowe #include <sys/file.h> 51f0089e39SRichard Lowe #include <sys/proc.h> 52f0089e39SRichard Lowe #include <sys/pcb.h> 53f0089e39SRichard Lowe #include <sys/lwp.h> 54f0089e39SRichard Lowe #include <sys/cpuvar.h> 55f0089e39SRichard Lowe #include <sys/thread.h> 56f0089e39SRichard Lowe #include <sys/disp.h> 57f0089e39SRichard Lowe #include <sys/fp.h> 58f0089e39SRichard Lowe #include <sys/siginfo.h> 59f0089e39SRichard Lowe #include <sys/archsystm.h> 60f0089e39SRichard Lowe #include <sys/kmem.h> 61f0089e39SRichard Lowe #include <sys/debug.h> 62f0089e39SRichard Lowe #include <sys/x86_archext.h> 63f0089e39SRichard Lowe #include <sys/sysmacros.h> 64f0089e39SRichard Lowe #include <sys/cmn_err.h> 65f0089e39SRichard Lowe #include <sys/kfpu.h> 66f0089e39SRichard Lowe 67f0089e39SRichard Lowe /* 68f0089e39SRichard Lowe * FPU Management Overview 69f0089e39SRichard Lowe * ----------------------- 70f0089e39SRichard Lowe * 71f0089e39SRichard Lowe * The x86 FPU has evolved substantially since its days as the x87 coprocessor; 72f0089e39SRichard Lowe * however, many aspects of its life as a coprocessor are still around in x86. 73f0089e39SRichard Lowe * 74f0089e39SRichard Lowe * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU. 75f0089e39SRichard Lowe * While that state still exists, there is much more that is covered by the FPU. 76f0089e39SRichard Lowe * Today, this includes not just traditional FPU state, but also supervisor only 77f0089e39SRichard Lowe * state. The following state is currently managed and covered logically by the 78f0089e39SRichard Lowe * idea of the FPU registers: 79f0089e39SRichard Lowe * 80f0089e39SRichard Lowe * o Traditional x87 FPU 81f0089e39SRichard Lowe * o Vector Registers (%xmm, %ymm, %zmm) 82f0089e39SRichard Lowe * o Memory Protection Extensions (MPX) Bounds Registers 83f0089e39SRichard Lowe * o Protected Key Rights Registers (PKRU) 84f0089e39SRichard Lowe * o Processor Trace data 85f0089e39SRichard Lowe * 86f0089e39SRichard Lowe * The rest of this covers how the FPU is managed and controlled, how state is 87f0089e39SRichard Lowe * saved and restored between threads, interactions with hypervisors, and other 88f0089e39SRichard Lowe * information exported to user land through aux vectors. A lot of background 89f0089e39SRichard Lowe * information is here to synthesize major parts of the Intel SDM, but 90f0089e39SRichard Lowe * unfortunately, it is not a replacement for reading it. 91f0089e39SRichard Lowe * 92f0089e39SRichard Lowe * FPU Control Registers 93f0089e39SRichard Lowe * --------------------- 94f0089e39SRichard Lowe * 95f0089e39SRichard Lowe * Because the x87 FPU began its life as a co-processor and the FPU was 96f0089e39SRichard Lowe * optional there are several bits that show up in %cr0 that we have to 97f0089e39SRichard Lowe * manipulate when dealing with the FPU. These are: 98f0089e39SRichard Lowe * 99f0089e39SRichard Lowe * o CR0.ET The 'extension type' bit. This was used originally to indicate 100f0089e39SRichard Lowe * that the FPU co-processor was present. Now it is forced on for 101f0089e39SRichard Lowe * compatibility. This is often used to verify whether or not the 102f0089e39SRichard Lowe * FPU is present. 103f0089e39SRichard Lowe * 104f0089e39SRichard Lowe * o CR0.NE The 'native error' bit. Used to indicate that native error 105f0089e39SRichard Lowe * mode should be enabled. This indicates that we should take traps 106f0089e39SRichard Lowe * on FPU errors. The OS enables this early in boot. 107f0089e39SRichard Lowe * 108f0089e39SRichard Lowe * o CR0.MP The 'Monitor Coprocessor' bit. Used to control whether or not 109f0089e39SRichard Lowe * wait/fwait instructions generate a #NM if CR0.TS is set. 110f0089e39SRichard Lowe * 111f0089e39SRichard Lowe * o CR0.EM The 'Emulation' bit. This is used to cause floating point 112f0089e39SRichard Lowe * operations (x87 through SSE4) to trap with a #UD so they can be 113f0089e39SRichard Lowe * emulated. The system never sets this bit, but makes sure it is 114f0089e39SRichard Lowe * clear on processor start up. 115f0089e39SRichard Lowe * 116f0089e39SRichard Lowe * o CR0.TS The 'Task Switched' bit. When this is turned on, a floating 117f0089e39SRichard Lowe * point operation will generate a #NM. An fwait will as well, 118f0089e39SRichard Lowe * depending on the value in CR0.MP. 119f0089e39SRichard Lowe * 120f0089e39SRichard Lowe * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by 121f0089e39SRichard Lowe * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more 122f0089e39SRichard Lowe * complicated role. Historically it has been used to allow running systems to 123f0089e39SRichard Lowe * restore the FPU registers lazily. This will be discussed in greater depth 124f0089e39SRichard Lowe * later on. 125f0089e39SRichard Lowe * 126f0089e39SRichard Lowe * %cr4 is also used as part of the FPU control. Specifically we need to worry 127f0089e39SRichard Lowe * about the following bits in the system: 128f0089e39SRichard Lowe * 129f0089e39SRichard Lowe * o CR4.OSFXSR This bit is used to indicate that the OS understands and 130f0089e39SRichard Lowe * supports the execution of the fxsave and fxrstor 131f0089e39SRichard Lowe * instructions. This bit is required to be set to enable 132f0089e39SRichard Lowe * the use of the SSE->SSE4 instructions. 133f0089e39SRichard Lowe * 134f0089e39SRichard Lowe * o CR4.OSXMMEXCPT This bit is used to indicate that the OS can understand 135f0089e39SRichard Lowe * and take a SIMD floating point exception (#XM). This bit 136f0089e39SRichard Lowe * is always enabled by the system. 137f0089e39SRichard Lowe * 138f0089e39SRichard Lowe * o CR4.OSXSAVE This bit is used to indicate that the OS understands and 139f0089e39SRichard Lowe * supports the execution of the xsave and xrstor family of 140f0089e39SRichard Lowe * instructions. This bit is required to use any of the AVX 141f0089e39SRichard Lowe * and newer feature sets. 142f0089e39SRichard Lowe * 143f0089e39SRichard Lowe * Because all supported processors are 64-bit, they'll always support the XMM 144f0089e39SRichard Lowe * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot. 145f0089e39SRichard Lowe * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid. 146f0089e39SRichard Lowe * 147f0089e39SRichard Lowe * %xcr0 is used to manage the behavior of the xsave feature set and is only 148f0089e39SRichard Lowe * present on the system if xsave is supported. %xcr0 is read and written to 149f0089e39SRichard Lowe * through by the xgetbv and xsetbv instructions. This register is present 150f0089e39SRichard Lowe * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a 151f0089e39SRichard Lowe * different component of the xsave state and controls whether or not that 152f0089e39SRichard Lowe * information is saved and restored. For newer feature sets like AVX and MPX, 153f0089e39SRichard Lowe * it also controls whether or not the corresponding instructions can be 154f0089e39SRichard Lowe * executed (much like CR0.OSFXSR does for the SSE feature sets). 155f0089e39SRichard Lowe * 156f0089e39SRichard Lowe * Everything in %xcr0 is around features available to users. There is also the 157f0089e39SRichard Lowe * IA32_XSS MSR which is used to control supervisor-only features that are still 158f0089e39SRichard Lowe * part of the xsave state. Bits that can be set in %xcr0 are reserved in 159f0089e39SRichard Lowe * IA32_XSS and vice versa. This is an important property that is particularly 160f0089e39SRichard Lowe * relevant to how the xsave instructions operate. 161f0089e39SRichard Lowe * 162f0089e39SRichard Lowe * Save Mechanisms 163f0089e39SRichard Lowe * --------------- 164f0089e39SRichard Lowe * 165f0089e39SRichard Lowe * When switching between running threads the FPU state needs to be saved and 166f0089e39SRichard Lowe * restored by the OS. If this state was not saved, users would rightfully 167f0089e39SRichard Lowe * complain about corrupt state. There are three mechanisms that exist on the 168f0089e39SRichard Lowe * processor for saving and restoring these state images: 169f0089e39SRichard Lowe * 170f0089e39SRichard Lowe * o fsave 171f0089e39SRichard Lowe * o fxsave 172f0089e39SRichard Lowe * o xsave 173f0089e39SRichard Lowe * 174f0089e39SRichard Lowe * fsave saves and restores only the x87 FPU and is the oldest of these 175f0089e39SRichard Lowe * mechanisms. This mechanism is never used in the kernel today because we are 176f0089e39SRichard Lowe * always running on systems that support fxsave. 177f0089e39SRichard Lowe * 178f0089e39SRichard Lowe * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register 179f0089e39SRichard Lowe * state to be saved and restored to and from a struct fxsave_state. This is the 180f0089e39SRichard Lowe * default mechanism that is used to save and restore the FPU on amd64. An 181f0089e39SRichard Lowe * important aspect of fxsave that was different from the original i386 fsave 182f0089e39SRichard Lowe * mechanism is that the restoring of FPU state with pending exceptions will not 183f0089e39SRichard Lowe * generate an exception, it will be deferred to the next use of the FPU. 184f0089e39SRichard Lowe * 185f0089e39SRichard Lowe * The final and by far the most complex mechanism is that of the xsave set. 186f0089e39SRichard Lowe * xsave allows for saving and restoring all of the traditional x86 pieces (x87 187f0089e39SRichard Lowe * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc. 188f0089e39SRichard Lowe * registers. 189f0089e39SRichard Lowe * 190f0089e39SRichard Lowe * Data is saved and restored into and out of a struct xsave_state. The first 191f0089e39SRichard Lowe * part of the struct xsave_state is equivalent to the struct fxsave_state. 192f0089e39SRichard Lowe * After that, there is a header which is used to describe the remaining 193f0089e39SRichard Lowe * portions of the state. The header is a 64-byte value of which the first two 194f0089e39SRichard Lowe * uint64_t values are defined and the rest are reserved and must be zero. The 195f0089e39SRichard Lowe * first uint64_t is the xstate_bv member. This describes which values in the 196f0089e39SRichard Lowe * xsave_state are actually valid and present. This is updated on a save and 197f0089e39SRichard Lowe * used on restore. The second member is the xcomp_bv member. Its last bit 198f0089e39SRichard Lowe * determines whether or not a compressed version of the structure is used. 199f0089e39SRichard Lowe * 200f0089e39SRichard Lowe * When the uncompressed structure is used (currently the only format we 201f0089e39SRichard Lowe * support), then each state component is at a fixed offset in the structure, 202f0089e39SRichard Lowe * even if it is not being used. For example, if you only saved the AVX related 203f0089e39SRichard Lowe * state, but did not save the MPX related state, the offset would not change 204f0089e39SRichard Lowe * for any component. With the compressed format, components that aren't used 205f0089e39SRichard Lowe * are all elided (though the x87 and SSE state are always there). 206f0089e39SRichard Lowe * 207f0089e39SRichard Lowe * Unlike fxsave which saves all state, the xsave family does not always save 208f0089e39SRichard Lowe * and restore all the state that could be covered by the xsave_state. The 209f0089e39SRichard Lowe * instructions all take an argument which is a mask of what to consider. This 210f0089e39SRichard Lowe * is the same mask that will be used in the xstate_bv vector and it is also the 211f0089e39SRichard Lowe * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only 212f0089e39SRichard Lowe * considered with the xsaves and xrstors instructions. 213f0089e39SRichard Lowe * 214f0089e39SRichard Lowe * When a save or restore is requested, a bitwise and is performed between the 215f0089e39SRichard Lowe * requested bits and those that have been enabled in %xcr0. Only the bits that 216f0089e39SRichard Lowe * match that are then saved or restored. Others will be silently ignored by 217f0089e39SRichard Lowe * the processor. This idea is used often in the OS. We will always request that 218f0089e39SRichard Lowe * we save and restore all of the state, but only those portions that are 219f0089e39SRichard Lowe * actually enabled in %xcr0 will be touched. 220f0089e39SRichard Lowe * 221f0089e39SRichard Lowe * If a feature has been asked to be restored that is not set in the xstate_bv 222f0089e39SRichard Lowe * feature vector of the save state, then it will be set to its initial state by 223f0089e39SRichard Lowe * the processor (usually zeros). Also, when asked to save state, the processor 224f0089e39SRichard Lowe * may not write out data that is in its initial state as an optimization. This 225f0089e39SRichard Lowe * optimization only applies to saving data and not to restoring data. 226f0089e39SRichard Lowe * 227f0089e39SRichard Lowe * There are a few different variants of the xsave and xrstor instruction. They 228f0089e39SRichard Lowe * are: 229f0089e39SRichard Lowe * 230f0089e39SRichard Lowe * o xsave This is the original save instruction. It will save all of the 231f0089e39SRichard Lowe * requested data in the xsave state structure. It only saves data 232f0089e39SRichard Lowe * in the uncompressed (xcomp_bv[63] is zero) format. It may be 233f0089e39SRichard Lowe * executed at all privilege levels. 234f0089e39SRichard Lowe * 235f0089e39SRichard Lowe * o xrstor This is the original restore instruction. It will restore all of 236f0089e39SRichard Lowe * the requested data. The xrstor function can handle both the 237f0089e39SRichard Lowe * compressed and uncompressed formats. It may be executed at all 238f0089e39SRichard Lowe * privilege levels. 239f0089e39SRichard Lowe * 240f0089e39SRichard Lowe * o xsaveopt This is a variant of the xsave instruction that employs 241f0089e39SRichard Lowe * optimizations to try and only write out state that has been 242f0089e39SRichard Lowe * modified since the last time an xrstor instruction was called. 243f0089e39SRichard Lowe * The processor tracks a tuple of information about the last 244f0089e39SRichard Lowe * xrstor and tries to ensure that the same buffer is being used 245f0089e39SRichard Lowe * when this optimization is being used. However, because of the 246f0089e39SRichard Lowe * way that it tracks the xrstor buffer based on the address of it, 247f0089e39SRichard Lowe * it is not suitable for use if that buffer can be easily reused. 248f0089e39SRichard Lowe * The most common case is trying to save data to the stack in 249f0089e39SRichard Lowe * rtld. It may be executed at all privilege levels. 250f0089e39SRichard Lowe * 251f0089e39SRichard Lowe * o xsavec This is a variant of the xsave instruction that writes out the 252f0089e39SRichard Lowe * compressed form of the xsave_state. Otherwise it behaves as 253f0089e39SRichard Lowe * xsave. It may be executed at all privilege levels. 254f0089e39SRichard Lowe * 255f0089e39SRichard Lowe * o xsaves This is a variant of the xsave instruction. It is similar to 256f0089e39SRichard Lowe * xsavec in that it always writes the compressed form of the 257f0089e39SRichard Lowe * buffer. Unlike all the other forms, this instruction looks at 258f0089e39SRichard Lowe * both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine 259f0089e39SRichard Lowe * what to save and restore. xsaves also implements the same 260f0089e39SRichard Lowe * optimization that xsaveopt does around modified pieces. User 261f0089e39SRichard Lowe * land may not execute the instruction. 262f0089e39SRichard Lowe * 263f0089e39SRichard Lowe * o xrstors This is a variant of the xrstor instruction. Similar to xsaves 264f0089e39SRichard Lowe * it can save and restore both the user and privileged states. 265f0089e39SRichard Lowe * Unlike xrstor it can only operate on the compressed form. 266f0089e39SRichard Lowe * User land may not execute the instruction. 267f0089e39SRichard Lowe * 268f0089e39SRichard Lowe * Based on all of these, the kernel has a precedence for what it will use. 269f0089e39SRichard Lowe * Basically, xsaves (not supported) is preferred to xsaveopt, which is 270f0089e39SRichard Lowe * preferred to xsave. A similar scheme is used when informing rtld (more later) 271f0089e39SRichard Lowe * about what it should use. xsavec is preferred to xsave. xsaveopt is not 272f0089e39SRichard Lowe * recommended due to the modified optimization not being appropriate for this 273f0089e39SRichard Lowe * use. 274f0089e39SRichard Lowe * 275f0089e39SRichard Lowe * Finally, there is one last gotcha with the xsave state. Importantly some AMD 276f0089e39SRichard Lowe * processors did not always save and restore some of the FPU exception state in 277f0089e39SRichard Lowe * some cases like Intel did. In those cases the OS will make up for this fact 278f0089e39SRichard Lowe * itself. 279f0089e39SRichard Lowe * 280f0089e39SRichard Lowe * FPU Initialization 281f0089e39SRichard Lowe * ------------------ 282f0089e39SRichard Lowe * 283f0089e39SRichard Lowe * One difference with the FPU registers is that not all threads have FPU state, 284f0089e39SRichard Lowe * only those that have an lwp. Generally this means kernel threads, which all 285f0089e39SRichard Lowe * share p0 and its lwp, do not have FPU state. Though there are definitely 286f0089e39SRichard Lowe * exceptions such as kcfpoold. In the rest of this discussion we'll use thread 287f0089e39SRichard Lowe * and lwp interchangeably, just think of thread meaning a thread that has a 288f0089e39SRichard Lowe * lwp. 289f0089e39SRichard Lowe * 290f0089e39SRichard Lowe * Each lwp has its FPU state allocated in its pcb (process control block). The 291f0089e39SRichard Lowe * actual storage comes from the fpsave_cachep kmem cache. This cache is sized 292f0089e39SRichard Lowe * dynamically at start up based on the save mechanism that we're using and the 293f0089e39SRichard Lowe * amount of memory required for it. This is dynamic because the xsave_state 294f0089e39SRichard Lowe * size varies based on the supported feature set. 295f0089e39SRichard Lowe * 296f0089e39SRichard Lowe * The hardware side of the FPU is initialized early in boot before we mount the 297f0089e39SRichard Lowe * root file system. This is effectively done in fpu_probe(). This is where we 298f0089e39SRichard Lowe * make the final decision about what the save and restore mechanisms we should 299f0089e39SRichard Lowe * use are, create the fpsave_cachep kmem cache, and initialize a number of 300f0089e39SRichard Lowe * function pointers that use save and restoring logic. 301f0089e39SRichard Lowe * 302f0089e39SRichard Lowe * The thread/lwp side is a a little more involved. There are two different 303f0089e39SRichard Lowe * things that we need to concern ourselves with. The first is how the FPU 304f0089e39SRichard Lowe * resources are allocated and the second is how the FPU state is initialized 305f0089e39SRichard Lowe * for a given lwp. 306f0089e39SRichard Lowe * 307f0089e39SRichard Lowe * We allocate the FPU save state from our kmem cache as part of lwp_fp_init(). 308f0089e39SRichard Lowe * This is always called unconditionally by the system as part of creating an 309f0089e39SRichard Lowe * LWP. 310f0089e39SRichard Lowe * 311f0089e39SRichard Lowe * There are three different initialization paths that we deal with. The first 312f0089e39SRichard Lowe * is when we are executing a new process. As part of exec all of the register 313f0089e39SRichard Lowe * state is reset. The exec case is particularly important because init is born 314f0089e39SRichard Lowe * like Athena, sprouting from the head of the kernel, without any true parent 315f0089e39SRichard Lowe * to fork from. The second is used whenever we fork or create a new lwp. The 316f0089e39SRichard Lowe * third is to deal with special lwps like the agent lwp. 317f0089e39SRichard Lowe * 318f0089e39SRichard Lowe * During exec, we will call fp_exec() which will initialize and set up the FPU 319f0089e39SRichard Lowe * state for the process. That will fill in the initial state for the FPU and 320f0089e39SRichard Lowe * also set that state in the FPU itself. As part of fp_exec() we also install a 321f0089e39SRichard Lowe * thread context operations vector that takes care of dealing with the saving 322f0089e39SRichard Lowe * and restoring of the FPU. These context handlers will also be called whenever 323f0089e39SRichard Lowe * an lwp is created or forked. In those cases, to initialize the FPU we will 324f0089e39SRichard Lowe * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context 325f0089e39SRichard Lowe * operations vector for the new thread. 326f0089e39SRichard Lowe * 327f0089e39SRichard Lowe * Next we'll end up in the context operation fp_new_lwp(). This saves the 328f0089e39SRichard Lowe * current thread's state, initializes the new thread's state, and copies over 329f0089e39SRichard Lowe * the relevant parts of the originating thread's state. It's as this point that 330f0089e39SRichard Lowe * we also install the FPU context operations into the new thread, which ensures 331f0089e39SRichard Lowe * that all future threads that are descendants of the current one get the 332f0089e39SRichard Lowe * thread context operations (unless they call exec). 333f0089e39SRichard Lowe * 334f0089e39SRichard Lowe * To deal with some things like the agent lwp, we double check the state of the 335f0089e39SRichard Lowe * FPU in sys_rtt_common() to make sure that it has been enabled before 336f0089e39SRichard Lowe * returning to user land. In general, this path should be rare, but it's useful 337f0089e39SRichard Lowe * for the odd lwp here and there. 338f0089e39SRichard Lowe * 339f0089e39SRichard Lowe * The FPU state will remain valid most of the time. There are times that 340f0089e39SRichard Lowe * the state will be rewritten. For example in restorecontext, due to /proc, or 341f0089e39SRichard Lowe * the lwp calls exec(). Whether the context is being freed or we are resetting 342f0089e39SRichard Lowe * the state, we will call fp_free() to disable the FPU and our context. 343f0089e39SRichard Lowe * 344f0089e39SRichard Lowe * Finally, when the lwp is destroyed, it will actually destroy and free the FPU 345f0089e39SRichard Lowe * state by calling fp_lwp_cleanup(). 346f0089e39SRichard Lowe * 347f0089e39SRichard Lowe * Kernel FPU Multiplexing 348f0089e39SRichard Lowe * ----------------------- 349f0089e39SRichard Lowe * 350f0089e39SRichard Lowe * Just as the kernel has to maintain all of the general purpose registers when 351f0089e39SRichard Lowe * switching between scheduled threads, the same is true of the FPU registers. 352f0089e39SRichard Lowe * 353f0089e39SRichard Lowe * When a thread has FPU state, it also has a set of context operations 354f0089e39SRichard Lowe * installed. These context operations take care of making sure that the FPU is 355f0089e39SRichard Lowe * properly saved and restored during a context switch (fpsave_ctxt and 356f0089e39SRichard Lowe * fprestore_ctxt respectively). This means that the current implementation of 357f0089e39SRichard Lowe * the FPU is 'eager', when a thread is running the CPU will have its FPU state 358f0089e39SRichard Lowe * loaded. While this is always true when executing in userland, there are a few 359f0089e39SRichard Lowe * cases where this is not true in the kernel. 360f0089e39SRichard Lowe * 361f0089e39SRichard Lowe * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was 362f0089e39SRichard Lowe * employed. This meant that the FPU would be saved on a context switch and the 363f0089e39SRichard Lowe * CR0.TS bit would be set. When a thread next tried to use the FPU, it would 364f0089e39SRichard Lowe * then take a #NM trap, at which point we would restore the FPU from the save 365f0089e39SRichard Lowe * area and return to user land. Given the frequency of use of the FPU alone by 366f0089e39SRichard Lowe * libc, there's no point returning to user land just to trap again. 367f0089e39SRichard Lowe * 368f0089e39SRichard Lowe * There are a few cases though where the FPU state may need to be changed for a 369f0089e39SRichard Lowe * thread on its behalf. The most notable cases are in the case of processes 370f0089e39SRichard Lowe * using /proc, restorecontext, forking, etc. In all of these cases the kernel 371f0089e39SRichard Lowe * will force a threads FPU state to be saved into the PCB through the fp_save() 372f0089e39SRichard Lowe * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the 373f0089e39SRichard Lowe * pcb. This indicates that the save state holds currently valid data. As a side 374f0089e39SRichard Lowe * effect of this, CR0.TS will be set. To make sure that all of the state is 375f0089e39SRichard Lowe * updated before returning to user land, in these cases, we set a flag on the 376f0089e39SRichard Lowe * PCB that says the FPU needs to be updated. This will make sure that we take 377f0089e39SRichard Lowe * the slow path out of a system call to fix things up for the thread. Due to 378f0089e39SRichard Lowe * the fact that this is a rather rare case, effectively setting the equivalent 379f0089e39SRichard Lowe * of t_postsys is acceptable. 380f0089e39SRichard Lowe * 381f0089e39SRichard Lowe * CR0.TS will be set after a save occurs and cleared when a restore occurs. 382f0089e39SRichard Lowe * Generally this means it will be cleared immediately by the new thread that is 383f0089e39SRichard Lowe * running in a context switch. However, this isn't the case for kernel threads. 384f0089e39SRichard Lowe * They currently operate with CR0.TS set as no kernel state is restored for 385f0089e39SRichard Lowe * them. This means that using the FPU will cause a #NM and panic. 386f0089e39SRichard Lowe * 387f0089e39SRichard Lowe * The FPU_VALID flag on the currently executing thread's pcb is meant to track 388f0089e39SRichard Lowe * what the value of CR0.TS should be. If it is set, then CR0.TS will be set. 389f0089e39SRichard Lowe * However, because we eagerly restore, the only time that CR0.TS should be set 390f0089e39SRichard Lowe * for a non-kernel thread is during operations where it will be cleared before 391f0089e39SRichard Lowe * returning to user land and importantly, the only data that is in it is its 392f0089e39SRichard Lowe * own. 393f0089e39SRichard Lowe * 394f0089e39SRichard Lowe * Kernel FPU Usage 395f0089e39SRichard Lowe * ---------------- 396f0089e39SRichard Lowe * 397f0089e39SRichard Lowe * Traditionally the kernel never used the FPU since it had no need for 398f0089e39SRichard Lowe * floating point operations. However, modern FPU hardware supports a variety 399f0089e39SRichard Lowe * of SIMD extensions which can speed up code such as parity calculations or 400f0089e39SRichard Lowe * encryption. 401f0089e39SRichard Lowe * 402f0089e39SRichard Lowe * To allow the kernel to take advantage of these features, the 403f0089e39SRichard Lowe * kernel_fpu_begin() and kernel_fpu_end() functions should be wrapped 404f0089e39SRichard Lowe * around any usage of the FPU by the kernel to ensure that user-level context 405f0089e39SRichard Lowe * is properly saved/restored, as well as to properly setup the FPU for use by 406f0089e39SRichard Lowe * the kernel. There are a variety of ways this wrapping can be used, as 407f0089e39SRichard Lowe * discussed in this section below. 408f0089e39SRichard Lowe * 409f0089e39SRichard Lowe * When kernel_fpu_begin() and kernel_fpu_end() are used for extended 410f0089e39SRichard Lowe * operations, the kernel_fpu_alloc() function should be used to allocate a 411f0089e39SRichard Lowe * kfpu_state_t structure that is used to save/restore the thread's kernel FPU 412f0089e39SRichard Lowe * state. This structure is not tied to any thread. That is, different threads 413f0089e39SRichard Lowe * can reuse the same kfpu_state_t structure, although not concurrently. A 414f0089e39SRichard Lowe * kfpu_state_t structure is freed by the kernel_fpu_free() function. 415f0089e39SRichard Lowe * 416f0089e39SRichard Lowe * In some cases, the kernel may need to use the FPU for a short operation 417f0089e39SRichard Lowe * without the overhead to manage a kfpu_state_t structure and without 418f0089e39SRichard Lowe * allowing for a context switch off the FPU. In this case the KFPU_NO_STATE 419f0089e39SRichard Lowe * bit can be set in the kernel_fpu_begin() and kernel_fpu_end() flags 420f0089e39SRichard Lowe * parameter. This indicates that there is no kfpu_state_t. When used this way, 421f0089e39SRichard Lowe * kernel preemption should be disabled by the caller (kpreempt_disable) before 422f0089e39SRichard Lowe * calling kernel_fpu_begin(), and re-enabled after calling kernel_fpu_end(). 423f0089e39SRichard Lowe * For this usage, it is important to limit the kernel's FPU use to short 424f0089e39SRichard Lowe * operations. The tradeoff between using the FPU without a kfpu_state_t 425f0089e39SRichard Lowe * structure vs. the overhead of allowing a context switch while using the FPU 426f0089e39SRichard Lowe * should be carefully considered on a case by case basis. 427f0089e39SRichard Lowe * 428f0089e39SRichard Lowe * In other cases, kernel threads have an LWP, but never execute in user space. 429f0089e39SRichard Lowe * In this situation, the LWP's pcb_fpu area can be used to save/restore the 430f0089e39SRichard Lowe * kernel's FPU state if the thread is context switched, instead of having to 431f0089e39SRichard Lowe * allocate and manage a kfpu_state_t structure. The KFPU_USE_LWP bit in the 432f0089e39SRichard Lowe * kernel_fpu_begin() and kernel_fpu_end() flags parameter is used to 433f0089e39SRichard Lowe * enable this behavior. It is the caller's responsibility to ensure that this 434f0089e39SRichard Lowe * is only used for a kernel thread which never executes in user space. 435f0089e39SRichard Lowe * 436f0089e39SRichard Lowe * FPU Exceptions 437f0089e39SRichard Lowe * -------------- 438f0089e39SRichard Lowe * 439f0089e39SRichard Lowe * Certain operations can cause the kernel to take traps due to FPU activity. 440f0089e39SRichard Lowe * Generally these events will cause a user process to receive a SIGFPU and if 441f0089e39SRichard Lowe * the kernel receives it in kernel context, we will die. Traditionally the #NM 442f0089e39SRichard Lowe * (Device Not Available / No Math) exception generated by CR0.TS would have 443f0089e39SRichard Lowe * caused us to restore the FPU. Now it is a fatal event regardless of whether 444f0089e39SRichard Lowe * or not user land causes it. 445f0089e39SRichard Lowe * 446f0089e39SRichard Lowe * While there are some cases where the kernel uses the FPU, it is up to the 447f0089e39SRichard Lowe * kernel to use the FPU in a way such that it cannot receive a trap or to use 448f0089e39SRichard Lowe * the appropriate trap protection mechanisms. 449f0089e39SRichard Lowe * 450f0089e39SRichard Lowe * Hypervisors 451f0089e39SRichard Lowe * ----------- 452f0089e39SRichard Lowe * 453f0089e39SRichard Lowe * When providing support for hypervisors things are a little bit more 454f0089e39SRichard Lowe * complicated because the FPU is not virtualized at all. This means that they 455f0089e39SRichard Lowe * need to save and restore the FPU and %xcr0 across entry and exit to the 456f0089e39SRichard Lowe * guest. To facilitate this, we provide a series of APIs in <sys/hma.h>. These 457f0089e39SRichard Lowe * allow us to use the full native state to make sure that we are always saving 458f0089e39SRichard Lowe * and restoring the full FPU that the host sees, even when the guest is using a 459f0089e39SRichard Lowe * subset. 460f0089e39SRichard Lowe * 461f0089e39SRichard Lowe * One tricky aspect of this is that the guest may be using a subset of %xcr0 462f0089e39SRichard Lowe * and therefore changing our %xcr0 on the fly. It is vital that when we're 463f0089e39SRichard Lowe * saving and restoring the FPU that we always use the largest %xcr0 contents 464f0089e39SRichard Lowe * otherwise we will end up leaving behind data in it. 465f0089e39SRichard Lowe * 466f0089e39SRichard Lowe * ELF PLT Support 467f0089e39SRichard Lowe * --------------- 468f0089e39SRichard Lowe * 469f0089e39SRichard Lowe * rtld has to preserve a subset of the FPU when it is saving and restoring 470f0089e39SRichard Lowe * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for 471f0089e39SRichard Lowe * more information. As a result, we set up an aux vector that contains 472f0089e39SRichard Lowe * information about what save and restore mechanisms it should be using and 473f0089e39SRichard Lowe * the sizing thereof based on what the kernel supports. This is passed down in 474f0089e39SRichard Lowe * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is 475f0089e39SRichard Lowe * initialized in fpu_subr.c. 476f0089e39SRichard Lowe */ 477f0089e39SRichard Lowe 478f0089e39SRichard Lowe kmem_cache_t *fpsave_cachep; 479f0089e39SRichard Lowe 480f0089e39SRichard Lowe /* Legacy fxsave layout + xsave header + ymm */ 481f0089e39SRichard Lowe #define AVX_XSAVE_SIZE (512 + 64 + 256) 482f0089e39SRichard Lowe 483f0089e39SRichard Lowe /* 484f0089e39SRichard Lowe * Various sanity checks. 485f0089e39SRichard Lowe */ 486f0089e39SRichard Lowe CTASSERT(sizeof (struct fxsave_state) == 512); 487f0089e39SRichard Lowe CTASSERT(sizeof (struct fnsave_state) == 108); 488f0089e39SRichard Lowe CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0); 489f0089e39SRichard Lowe CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE); 490f0089e39SRichard Lowe 491f0089e39SRichard Lowe /* 492f0089e39SRichard Lowe * This structure is the x86 implementation of the kernel FPU that is defined in 493f0089e39SRichard Lowe * uts/common/sys/kfpu.h. 494f0089e39SRichard Lowe */ 495f0089e39SRichard Lowe 496f0089e39SRichard Lowe typedef enum kfpu_flags { 497f0089e39SRichard Lowe /* 498f0089e39SRichard Lowe * This indicates that the save state has initial FPU data. 499f0089e39SRichard Lowe */ 500f0089e39SRichard Lowe KFPU_F_INITIALIZED = 0x01 501f0089e39SRichard Lowe } kfpu_flags_t; 502f0089e39SRichard Lowe 503f0089e39SRichard Lowe struct kfpu_state { 504f0089e39SRichard Lowe fpu_ctx_t kfpu_ctx; 505f0089e39SRichard Lowe kfpu_flags_t kfpu_flags; 506f0089e39SRichard Lowe kthread_t *kfpu_curthread; 507f0089e39SRichard Lowe }; 508f0089e39SRichard Lowe 509f0089e39SRichard Lowe /* 510f0089e39SRichard Lowe * Initial kfpu state for SSE/SSE2 used by fpinit() 511f0089e39SRichard Lowe */ 512f0089e39SRichard Lowe const struct fxsave_state sse_initial = { 513f0089e39SRichard Lowe FPU_CW_INIT, /* fx_fcw */ 514f0089e39SRichard Lowe 0, /* fx_fsw */ 515f0089e39SRichard Lowe 0, /* fx_fctw */ 516f0089e39SRichard Lowe 0, /* fx_fop */ 517f0089e39SRichard Lowe 0, /* fx_rip */ 518f0089e39SRichard Lowe 0, /* fx_rdp */ 519f0089e39SRichard Lowe SSE_MXCSR_INIT /* fx_mxcsr */ 520f0089e39SRichard Lowe /* rest of structure is zero */ 521f0089e39SRichard Lowe }; 522f0089e39SRichard Lowe 523f0089e39SRichard Lowe /* 524f0089e39SRichard Lowe * Initial kfpu state for AVX used by fpinit() 525f0089e39SRichard Lowe */ 526f0089e39SRichard Lowe const struct xsave_state avx_initial = { 527f0089e39SRichard Lowe /* 528f0089e39SRichard Lowe * The definition below needs to be identical with sse_initial 529f0089e39SRichard Lowe * defined above. 530f0089e39SRichard Lowe */ 531*957246c9SPatrick Mooney .xs_fxsave = { 532*957246c9SPatrick Mooney .fx_fcw = FPU_CW_INIT, 533*957246c9SPatrick Mooney .fx_mxcsr = SSE_MXCSR_INIT, 534f0089e39SRichard Lowe }, 535*957246c9SPatrick Mooney .xs_header = { 536f0089e39SRichard Lowe /* 537*957246c9SPatrick Mooney * bit0 = 1 for XSTATE_BV to indicate that legacy fields are 538*957246c9SPatrick Mooney * valid, and CPU should initialize XMM/YMM. 539f0089e39SRichard Lowe */ 540*957246c9SPatrick Mooney .xsh_xstate_bv = 1, 541*957246c9SPatrick Mooney .xsh_xcomp_bv = 0, 542*957246c9SPatrick Mooney }, 543f0089e39SRichard Lowe }; 544f0089e39SRichard Lowe 545f0089e39SRichard Lowe /* 546f0089e39SRichard Lowe * mxcsr_mask value (possibly reset in fpu_probe); used to avoid 547f0089e39SRichard Lowe * the #gp exception caused by setting unsupported bits in the 548f0089e39SRichard Lowe * MXCSR register 549f0089e39SRichard Lowe */ 550f0089e39SRichard Lowe uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT; 551f0089e39SRichard Lowe 552f0089e39SRichard Lowe /* 553f0089e39SRichard Lowe * Initial kfpu state for x87 used by fpinit() 554f0089e39SRichard Lowe */ 555f0089e39SRichard Lowe const struct fnsave_state x87_initial = { 556f0089e39SRichard Lowe FPU_CW_INIT, /* f_fcw */ 557f0089e39SRichard Lowe 0, /* __f_ign0 */ 558f0089e39SRichard Lowe 0, /* f_fsw */ 559f0089e39SRichard Lowe 0, /* __f_ign1 */ 560f0089e39SRichard Lowe 0xffff, /* f_ftw */ 561f0089e39SRichard Lowe /* rest of structure is zero */ 562f0089e39SRichard Lowe }; 563f0089e39SRichard Lowe 564f0089e39SRichard Lowe /* 565f0089e39SRichard Lowe * This vector is patched to xsave_ctxt() or xsaveopt_ctxt() if we discover we 566f0089e39SRichard Lowe * have an XSAVE-capable chip in fpu_probe. 567f0089e39SRichard Lowe */ 568f0089e39SRichard Lowe void (*fpsave_ctxt)(void *) = fpxsave_ctxt; 569f0089e39SRichard Lowe void (*fprestore_ctxt)(void *) = fpxrestore_ctxt; 570f0089e39SRichard Lowe 571f0089e39SRichard Lowe /* 572f0089e39SRichard Lowe * This function pointer is changed to xsaveopt if the CPU is xsaveopt capable. 573f0089e39SRichard Lowe */ 574f0089e39SRichard Lowe void (*xsavep)(struct xsave_state *, uint64_t) = xsave; 575f0089e39SRichard Lowe 576f0089e39SRichard Lowe static int fpe_sicode(uint_t); 577f0089e39SRichard Lowe static int fpe_simd_sicode(uint_t); 5785a469116SPatrick Mooney static void fp_new_lwp(void *, void *); 5795a469116SPatrick Mooney static void fp_free_ctx(void *, int); 5805a469116SPatrick Mooney 5815a469116SPatrick Mooney static struct ctxop * 5825a469116SPatrick Mooney fp_ctxop_allocate(struct fpu_ctx *fp) 5835a469116SPatrick Mooney { 5845a469116SPatrick Mooney const struct ctxop_template tpl = { 5855a469116SPatrick Mooney .ct_rev = CTXOP_TPL_REV, 5865a469116SPatrick Mooney .ct_save = fpsave_ctxt, 5875a469116SPatrick Mooney .ct_restore = fprestore_ctxt, 5885a469116SPatrick Mooney .ct_fork = fp_new_lwp, 5895a469116SPatrick Mooney .ct_lwp_create = fp_new_lwp, 5905a469116SPatrick Mooney .ct_free = fp_free_ctx, 5915a469116SPatrick Mooney }; 5925a469116SPatrick Mooney return (ctxop_allocate(&tpl, fp)); 5935a469116SPatrick Mooney } 594f0089e39SRichard Lowe 595f0089e39SRichard Lowe /* 596f0089e39SRichard Lowe * Copy the state of parent lwp's floating point context into the new lwp. 597f0089e39SRichard Lowe * Invoked for both fork() and lwp_create(). 598f0089e39SRichard Lowe * 599f0089e39SRichard Lowe * Note that we inherit -only- the control state (e.g. exception masks, 600f0089e39SRichard Lowe * rounding, precision control, etc.); the FPU registers are otherwise 601f0089e39SRichard Lowe * reset to their initial state. 602f0089e39SRichard Lowe */ 603f0089e39SRichard Lowe static void 6045a469116SPatrick Mooney fp_new_lwp(void *parent, void *child) 605f0089e39SRichard Lowe { 6065a469116SPatrick Mooney kthread_id_t t = parent, ct = child; 607f0089e39SRichard Lowe struct fpu_ctx *fp; /* parent fpu context */ 608f0089e39SRichard Lowe struct fpu_ctx *cfp; /* new fpu context */ 609f0089e39SRichard Lowe struct fxsave_state *fx, *cfx; 610f0089e39SRichard Lowe struct xsave_state *cxs; 611f0089e39SRichard Lowe 612f0089e39SRichard Lowe ASSERT(fp_kind != FP_NO); 613f0089e39SRichard Lowe 614f0089e39SRichard Lowe fp = &t->t_lwp->lwp_pcb.pcb_fpu; 615f0089e39SRichard Lowe cfp = &ct->t_lwp->lwp_pcb.pcb_fpu; 616f0089e39SRichard Lowe 617f0089e39SRichard Lowe /* 618f0089e39SRichard Lowe * If the parent FPU state is still in the FPU hw then save it; 619f0089e39SRichard Lowe * conveniently, fp_save() already does this for us nicely. 620f0089e39SRichard Lowe */ 621f0089e39SRichard Lowe fp_save(fp); 622f0089e39SRichard Lowe 623f0089e39SRichard Lowe cfp->fpu_flags = FPU_EN | FPU_VALID; 624f0089e39SRichard Lowe cfp->fpu_regs.kfpu_status = 0; 625f0089e39SRichard Lowe cfp->fpu_regs.kfpu_xstatus = 0; 626f0089e39SRichard Lowe 627f0089e39SRichard Lowe /* 628f0089e39SRichard Lowe * Make sure that the child's FPU is cleaned up and made ready for user 629f0089e39SRichard Lowe * land. 630f0089e39SRichard Lowe */ 631f0089e39SRichard Lowe PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb); 632f0089e39SRichard Lowe 633f0089e39SRichard Lowe switch (fp_save_mech) { 634f0089e39SRichard Lowe case FP_FXSAVE: 635f0089e39SRichard Lowe fx = fp->fpu_regs.kfpu_u.kfpu_fx; 636f0089e39SRichard Lowe cfx = cfp->fpu_regs.kfpu_u.kfpu_fx; 637f0089e39SRichard Lowe bcopy(&sse_initial, cfx, sizeof (*cfx)); 638f0089e39SRichard Lowe cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 639f0089e39SRichard Lowe cfx->fx_fcw = fx->fx_fcw; 640f0089e39SRichard Lowe break; 641f0089e39SRichard Lowe 642f0089e39SRichard Lowe case FP_XSAVE: 643f0089e39SRichard Lowe cfp->fpu_xsave_mask = fp->fpu_xsave_mask; 644f0089e39SRichard Lowe 645f0089e39SRichard Lowe VERIFY(fp->fpu_regs.kfpu_u.kfpu_xs != NULL); 646f0089e39SRichard Lowe 647f0089e39SRichard Lowe fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave; 648f0089e39SRichard Lowe cxs = cfp->fpu_regs.kfpu_u.kfpu_xs; 649f0089e39SRichard Lowe cfx = &cxs->xs_fxsave; 650f0089e39SRichard Lowe 651f0089e39SRichard Lowe bcopy(&avx_initial, cxs, sizeof (*cxs)); 652f0089e39SRichard Lowe cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS; 653f0089e39SRichard Lowe cfx->fx_fcw = fx->fx_fcw; 654*957246c9SPatrick Mooney cxs->xs_header.xsh_xstate_bv |= 655*957246c9SPatrick Mooney (get_xcr(XFEATURE_ENABLED_MASK) & XFEATURE_FP_INITIAL); 656f0089e39SRichard Lowe break; 657f0089e39SRichard Lowe default: 658f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 659f0089e39SRichard Lowe /*NOTREACHED*/ 660f0089e39SRichard Lowe } 661f0089e39SRichard Lowe 662f0089e39SRichard Lowe /* 663f0089e39SRichard Lowe * Mark that both the parent and child need to have the FPU cleaned up 664f0089e39SRichard Lowe * before returning to user land. 665f0089e39SRichard Lowe */ 666f0089e39SRichard Lowe 6675a469116SPatrick Mooney ctxop_attach(ct, fp_ctxop_allocate(cfp)); 668f0089e39SRichard Lowe } 669f0089e39SRichard Lowe 670f0089e39SRichard Lowe /* 671f0089e39SRichard Lowe * Free any state associated with floating point context. 672f0089e39SRichard Lowe * Fp_free can be called in three cases: 673f0089e39SRichard Lowe * 1) from reaper -> thread_free -> freectx-> fp_free 674f0089e39SRichard Lowe * fp context belongs to a thread on deathrow 675f0089e39SRichard Lowe * nothing to do, thread will never be resumed 676f0089e39SRichard Lowe * thread calling ctxfree is reaper 677f0089e39SRichard Lowe * 678f0089e39SRichard Lowe * 2) from exec -> freectx -> fp_free 679f0089e39SRichard Lowe * fp context belongs to the current thread 680f0089e39SRichard Lowe * must disable fpu, thread calling ctxfree is curthread 681f0089e39SRichard Lowe * 682f0089e39SRichard Lowe * 3) from restorecontext -> setfpregs -> fp_free 683f0089e39SRichard Lowe * we have a modified context in the memory (lwp->pcb_fpu) 684f0089e39SRichard Lowe * disable fpu and release the fp context for the CPU 685f0089e39SRichard Lowe * 686f0089e39SRichard Lowe */ 687f0089e39SRichard Lowe void 6885a469116SPatrick Mooney fp_free(struct fpu_ctx *fp) 689f0089e39SRichard Lowe { 690f0089e39SRichard Lowe ASSERT(fp_kind != FP_NO); 691f0089e39SRichard Lowe 692f0089e39SRichard Lowe if (fp->fpu_flags & FPU_VALID) 693f0089e39SRichard Lowe return; 694f0089e39SRichard Lowe 695f0089e39SRichard Lowe kpreempt_disable(); 696f0089e39SRichard Lowe /* 697f0089e39SRichard Lowe * We want to do fpsave rather than fpdisable so that we can 698f0089e39SRichard Lowe * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit 699f0089e39SRichard Lowe */ 700f0089e39SRichard Lowe fp->fpu_flags |= FPU_VALID; 701f0089e39SRichard Lowe /* If for current thread disable FP to track FPU_VALID */ 702f0089e39SRichard Lowe if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) { 703f0089e39SRichard Lowe /* Clear errors if any to prevent frstor from complaining */ 704f0089e39SRichard Lowe (void) fperr_reset(); 705f0089e39SRichard Lowe if (fp_kind & __FP_SSE) 706f0089e39SRichard Lowe (void) fpxerr_reset(); 707f0089e39SRichard Lowe fpdisable(); 708f0089e39SRichard Lowe } 709f0089e39SRichard Lowe kpreempt_enable(); 710f0089e39SRichard Lowe } 711f0089e39SRichard Lowe 712f0089e39SRichard Lowe /* 7135a469116SPatrick Mooney * Wrapper for freectx to make the types line up for fp_free() 7145a469116SPatrick Mooney */ 7155a469116SPatrick Mooney static void 7165a469116SPatrick Mooney fp_free_ctx(void *arg, int isexec __unused) 7175a469116SPatrick Mooney { 7185a469116SPatrick Mooney fp_free((struct fpu_ctx *)arg); 7195a469116SPatrick Mooney } 7205a469116SPatrick Mooney 7215a469116SPatrick Mooney /* 722f0089e39SRichard Lowe * Store the floating point state and disable the floating point unit. 723f0089e39SRichard Lowe */ 724f0089e39SRichard Lowe void 725f0089e39SRichard Lowe fp_save(struct fpu_ctx *fp) 726f0089e39SRichard Lowe { 727f0089e39SRichard Lowe ASSERT(fp_kind != FP_NO); 728f0089e39SRichard Lowe 729f0089e39SRichard Lowe kpreempt_disable(); 730f0089e39SRichard Lowe if (!fp || fp->fpu_flags & FPU_VALID || 731f0089e39SRichard Lowe (fp->fpu_flags & FPU_EN) == 0) { 732f0089e39SRichard Lowe kpreempt_enable(); 733f0089e39SRichard Lowe return; 734f0089e39SRichard Lowe } 735f0089e39SRichard Lowe ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu); 736f0089e39SRichard Lowe 737f0089e39SRichard Lowe switch (fp_save_mech) { 738f0089e39SRichard Lowe case FP_FXSAVE: 739f0089e39SRichard Lowe fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx); 740f0089e39SRichard Lowe break; 741f0089e39SRichard Lowe 742f0089e39SRichard Lowe case FP_XSAVE: 743f0089e39SRichard Lowe xsavep(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); 744f0089e39SRichard Lowe break; 745f0089e39SRichard Lowe default: 746f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 747f0089e39SRichard Lowe /*NOTREACHED*/ 748f0089e39SRichard Lowe } 749f0089e39SRichard Lowe 750f0089e39SRichard Lowe fp->fpu_flags |= FPU_VALID; 751f0089e39SRichard Lowe 752f0089e39SRichard Lowe /* 753f0089e39SRichard Lowe * We save the FPU as part of forking, execing, modifications via /proc, 754f0089e39SRichard Lowe * restorecontext, etc. As such, we need to make sure that we return to 755f0089e39SRichard Lowe * userland with valid state in the FPU. If we're context switched out 756f0089e39SRichard Lowe * before we hit sys_rtt_common() we'll end up having restored the FPU 757f0089e39SRichard Lowe * as part of the context ops operations. The restore logic always makes 758f0089e39SRichard Lowe * sure that FPU_VALID is set before doing a restore so we don't restore 759f0089e39SRichard Lowe * it a second time. 760f0089e39SRichard Lowe */ 761f0089e39SRichard Lowe PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb); 762f0089e39SRichard Lowe 763f0089e39SRichard Lowe kpreempt_enable(); 764f0089e39SRichard Lowe } 765f0089e39SRichard Lowe 766f0089e39SRichard Lowe /* 767f0089e39SRichard Lowe * Restore the FPU context for the thread: 768f0089e39SRichard Lowe * The possibilities are: 769f0089e39SRichard Lowe * 1. No active FPU context: Load the new context into the FPU hw 770f0089e39SRichard Lowe * and enable the FPU. 771f0089e39SRichard Lowe */ 772f0089e39SRichard Lowe void 773f0089e39SRichard Lowe fp_restore(struct fpu_ctx *fp) 774f0089e39SRichard Lowe { 775f0089e39SRichard Lowe switch (fp_save_mech) { 776f0089e39SRichard Lowe case FP_FXSAVE: 777f0089e39SRichard Lowe fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx); 778f0089e39SRichard Lowe break; 779f0089e39SRichard Lowe 780f0089e39SRichard Lowe case FP_XSAVE: 781f0089e39SRichard Lowe xrestore(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask); 782f0089e39SRichard Lowe break; 783f0089e39SRichard Lowe default: 784f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 785f0089e39SRichard Lowe /*NOTREACHED*/ 786f0089e39SRichard Lowe } 787f0089e39SRichard Lowe 788f0089e39SRichard Lowe fp->fpu_flags &= ~FPU_VALID; 789f0089e39SRichard Lowe } 790f0089e39SRichard Lowe 791f0089e39SRichard Lowe /* 792f0089e39SRichard Lowe * Reset the FPU such that it is in a valid state for a new thread that is 793f0089e39SRichard Lowe * coming out of exec. The FPU will be in a usable state at this point. At this 794f0089e39SRichard Lowe * point we know that the FPU state has already been allocated and if this 795f0089e39SRichard Lowe * wasn't an init process, then it will have had fp_free() previously called. 796f0089e39SRichard Lowe */ 797f0089e39SRichard Lowe void 798f0089e39SRichard Lowe fp_exec(void) 799f0089e39SRichard Lowe { 800f0089e39SRichard Lowe struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 801f0089e39SRichard Lowe 802f0089e39SRichard Lowe if (fp_save_mech == FP_XSAVE) { 803f0089e39SRichard Lowe fp->fpu_xsave_mask = XFEATURE_FP_ALL; 804f0089e39SRichard Lowe } 805f0089e39SRichard Lowe 8065a469116SPatrick Mooney struct ctxop *ctx = fp_ctxop_allocate(fp); 807f0089e39SRichard Lowe /* 808f0089e39SRichard Lowe * Make sure that we're not preempted in the middle of initializing the 809f0089e39SRichard Lowe * FPU on CPU. 810f0089e39SRichard Lowe */ 811f0089e39SRichard Lowe kpreempt_disable(); 8125a469116SPatrick Mooney ctxop_attach(curthread, ctx); 813f0089e39SRichard Lowe fpinit(); 814f0089e39SRichard Lowe fp->fpu_flags = FPU_EN; 815f0089e39SRichard Lowe kpreempt_enable(); 816f0089e39SRichard Lowe } 817f0089e39SRichard Lowe 818f0089e39SRichard Lowe 819f0089e39SRichard Lowe /* 820f0089e39SRichard Lowe * Seeds the initial state for the current thread. The possibilities are: 821f0089e39SRichard Lowe * 1. Another process has modified the FPU state before we have done any 822f0089e39SRichard Lowe * initialization: Load the FPU state from the LWP state. 823f0089e39SRichard Lowe * 2. The FPU state has not been externally modified: Load a clean state. 824f0089e39SRichard Lowe */ 825f0089e39SRichard Lowe void 826f0089e39SRichard Lowe fp_seed(void) 827f0089e39SRichard Lowe { 828f0089e39SRichard Lowe struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 829f0089e39SRichard Lowe 830f0089e39SRichard Lowe ASSERT(curthread->t_preempt >= 1); 831f0089e39SRichard Lowe ASSERT((fp->fpu_flags & FPU_EN) == 0); 832f0089e39SRichard Lowe 833f0089e39SRichard Lowe /* 834f0089e39SRichard Lowe * Always initialize a new context and initialize the hardware. 835f0089e39SRichard Lowe */ 836f0089e39SRichard Lowe if (fp_save_mech == FP_XSAVE) { 837f0089e39SRichard Lowe fp->fpu_xsave_mask = XFEATURE_FP_ALL; 838f0089e39SRichard Lowe } 839f0089e39SRichard Lowe 8405a469116SPatrick Mooney ctxop_attach(curthread, fp_ctxop_allocate(fp)); 841f0089e39SRichard Lowe fpinit(); 842f0089e39SRichard Lowe 843f0089e39SRichard Lowe /* 844f0089e39SRichard Lowe * If FPU_VALID is set, it means someone has modified registers via 845f0089e39SRichard Lowe * /proc. In this case, restore the current lwp's state. 846f0089e39SRichard Lowe */ 847f0089e39SRichard Lowe if (fp->fpu_flags & FPU_VALID) 848f0089e39SRichard Lowe fp_restore(fp); 849f0089e39SRichard Lowe 850f0089e39SRichard Lowe ASSERT((fp->fpu_flags & FPU_VALID) == 0); 851f0089e39SRichard Lowe fp->fpu_flags = FPU_EN; 852f0089e39SRichard Lowe } 853f0089e39SRichard Lowe 854f0089e39SRichard Lowe /* 855f0089e39SRichard Lowe * When using xsave/xrstor, these three functions are used by the lwp code to 856f0089e39SRichard Lowe * manage the memory for the xsave area. 857f0089e39SRichard Lowe */ 858f0089e39SRichard Lowe void 859f0089e39SRichard Lowe fp_lwp_init(struct _klwp *lwp) 860f0089e39SRichard Lowe { 861f0089e39SRichard Lowe struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu; 862f0089e39SRichard Lowe 863f0089e39SRichard Lowe /* 864f0089e39SRichard Lowe * We keep a copy of the pointer in lwp_fpu so that we can restore the 865f0089e39SRichard Lowe * value in forklwp() after we duplicate the parent's LWP state. 866f0089e39SRichard Lowe */ 867f0089e39SRichard Lowe lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic = 868f0089e39SRichard Lowe kmem_cache_alloc(fpsave_cachep, KM_SLEEP); 869f0089e39SRichard Lowe 870f0089e39SRichard Lowe if (fp_save_mech == FP_XSAVE) { 871f0089e39SRichard Lowe /* 872f0089e39SRichard Lowe * 873f0089e39SRichard Lowe * We bzero since the fpinit() code path will only 874f0089e39SRichard Lowe * partially initialize the xsave area using avx_inital. 875f0089e39SRichard Lowe */ 876f0089e39SRichard Lowe ASSERT(cpuid_get_xsave_size() >= sizeof (struct xsave_state)); 877f0089e39SRichard Lowe bzero(fp->fpu_regs.kfpu_u.kfpu_xs, cpuid_get_xsave_size()); 878f0089e39SRichard Lowe } 879f0089e39SRichard Lowe } 880f0089e39SRichard Lowe 881f0089e39SRichard Lowe void 882f0089e39SRichard Lowe fp_lwp_cleanup(struct _klwp *lwp) 883f0089e39SRichard Lowe { 884f0089e39SRichard Lowe struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu; 885f0089e39SRichard Lowe 886f0089e39SRichard Lowe if (fp->fpu_regs.kfpu_u.kfpu_generic != NULL) { 887f0089e39SRichard Lowe kmem_cache_free(fpsave_cachep, 888f0089e39SRichard Lowe fp->fpu_regs.kfpu_u.kfpu_generic); 889f0089e39SRichard Lowe lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic = NULL; 890f0089e39SRichard Lowe } 891f0089e39SRichard Lowe } 892f0089e39SRichard Lowe 893f0089e39SRichard Lowe /* 894f0089e39SRichard Lowe * Called during the process of forklwp(). The kfpu_u pointer will have been 895f0089e39SRichard Lowe * overwritten while copying the parent's LWP structure. We have a valid copy 896f0089e39SRichard Lowe * stashed in the child's lwp_fpu which we use to restore the correct value. 897f0089e39SRichard Lowe */ 898f0089e39SRichard Lowe void 899f0089e39SRichard Lowe fp_lwp_dup(struct _klwp *lwp) 900f0089e39SRichard Lowe { 901f0089e39SRichard Lowe void *xp = lwp->lwp_fpu; 902f0089e39SRichard Lowe size_t sz; 903f0089e39SRichard Lowe 904f0089e39SRichard Lowe switch (fp_save_mech) { 905f0089e39SRichard Lowe case FP_FXSAVE: 906f0089e39SRichard Lowe sz = sizeof (struct fxsave_state); 907f0089e39SRichard Lowe break; 908f0089e39SRichard Lowe case FP_XSAVE: 909f0089e39SRichard Lowe sz = cpuid_get_xsave_size(); 910f0089e39SRichard Lowe break; 911f0089e39SRichard Lowe default: 912f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 913f0089e39SRichard Lowe /*NOTREACHED*/ 914f0089e39SRichard Lowe } 915f0089e39SRichard Lowe 916f0089e39SRichard Lowe /* copy the parent's values into the new lwp's struct */ 917f0089e39SRichard Lowe bcopy(lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic, xp, sz); 918f0089e39SRichard Lowe /* now restore the pointer */ 919f0089e39SRichard Lowe lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp; 920f0089e39SRichard Lowe } 921f0089e39SRichard Lowe 922f0089e39SRichard Lowe /* 923f0089e39SRichard Lowe * Handle a processor extension error fault 924f0089e39SRichard Lowe * Returns non zero for error. 925f0089e39SRichard Lowe */ 926f0089e39SRichard Lowe 927f0089e39SRichard Lowe /*ARGSUSED*/ 928f0089e39SRichard Lowe int 929f0089e39SRichard Lowe fpexterrflt(struct regs *rp) 930f0089e39SRichard Lowe { 931f0089e39SRichard Lowe uint32_t fpcw, fpsw; 932f0089e39SRichard Lowe fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 933f0089e39SRichard Lowe 934f0089e39SRichard Lowe ASSERT(fp_kind != FP_NO); 935f0089e39SRichard Lowe 936f0089e39SRichard Lowe /* 937f0089e39SRichard Lowe * Now we can enable the interrupts. 938f0089e39SRichard Lowe * (NOTE: x87 fp exceptions come thru interrupt gate) 939f0089e39SRichard Lowe */ 940f0089e39SRichard Lowe sti(); 941f0089e39SRichard Lowe 942f0089e39SRichard Lowe if (!fpu_exists) 943f0089e39SRichard Lowe return (FPE_FLTINV); 944f0089e39SRichard Lowe 945f0089e39SRichard Lowe /* 946f0089e39SRichard Lowe * Do an unconditional save of the FP state. If it's dirty (TS=0), 947f0089e39SRichard Lowe * it'll be saved into the fpu context area passed in (that of the 948f0089e39SRichard Lowe * current thread). If it's not dirty (it may not be, due to 949f0089e39SRichard Lowe * an intervening save due to a context switch between the sti(), 950f0089e39SRichard Lowe * above and here, then it's safe to just use the stored values in 951f0089e39SRichard Lowe * the context save area to determine the cause of the fault. 952f0089e39SRichard Lowe */ 953f0089e39SRichard Lowe fp_save(fp); 954f0089e39SRichard Lowe 955f0089e39SRichard Lowe /* clear exception flags in saved state, as if by fnclex */ 956f0089e39SRichard Lowe switch (fp_save_mech) { 957f0089e39SRichard Lowe case FP_FXSAVE: 958f0089e39SRichard Lowe fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw; 959f0089e39SRichard Lowe fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw; 960f0089e39SRichard Lowe fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw &= ~FPS_SW_EFLAGS; 961f0089e39SRichard Lowe break; 962f0089e39SRichard Lowe 963f0089e39SRichard Lowe case FP_XSAVE: 964f0089e39SRichard Lowe fpsw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw; 965f0089e39SRichard Lowe fpcw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fcw; 966f0089e39SRichard Lowe fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS; 967f0089e39SRichard Lowe /* 968f0089e39SRichard Lowe * Always set LEGACY_FP as it may have been cleared by XSAVE 969f0089e39SRichard Lowe * instruction 970f0089e39SRichard Lowe */ 971*957246c9SPatrick Mooney fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |= 972*957246c9SPatrick Mooney XFEATURE_LEGACY_FP; 973f0089e39SRichard Lowe break; 974f0089e39SRichard Lowe default: 975f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 976f0089e39SRichard Lowe /*NOTREACHED*/ 977f0089e39SRichard Lowe } 978f0089e39SRichard Lowe 979f0089e39SRichard Lowe fp->fpu_regs.kfpu_status = fpsw; 980f0089e39SRichard Lowe 981f0089e39SRichard Lowe if ((fpsw & FPS_ES) == 0) 982f0089e39SRichard Lowe return (0); /* No exception */ 983f0089e39SRichard Lowe 984f0089e39SRichard Lowe /* 985f0089e39SRichard Lowe * "and" the exception flags with the complement of the mask 986f0089e39SRichard Lowe * bits to determine which exception occurred 987f0089e39SRichard Lowe */ 988f0089e39SRichard Lowe return (fpe_sicode(fpsw & ~fpcw & 0x3f)); 989f0089e39SRichard Lowe } 990f0089e39SRichard Lowe 991f0089e39SRichard Lowe /* 992f0089e39SRichard Lowe * Handle an SSE/SSE2 precise exception. 993f0089e39SRichard Lowe * Returns a non-zero sicode for error. 994f0089e39SRichard Lowe */ 995f0089e39SRichard Lowe /*ARGSUSED*/ 996f0089e39SRichard Lowe int 997f0089e39SRichard Lowe fpsimderrflt(struct regs *rp) 998f0089e39SRichard Lowe { 999f0089e39SRichard Lowe uint32_t mxcsr, xmask; 1000f0089e39SRichard Lowe fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu; 1001f0089e39SRichard Lowe 1002f0089e39SRichard Lowe ASSERT(fp_kind & __FP_SSE); 1003f0089e39SRichard Lowe 1004f0089e39SRichard Lowe /* 1005f0089e39SRichard Lowe * NOTE: Interrupts are disabled during execution of this 1006f0089e39SRichard Lowe * function. They are enabled by the caller in trap.c. 1007f0089e39SRichard Lowe */ 1008f0089e39SRichard Lowe 1009f0089e39SRichard Lowe /* 1010f0089e39SRichard Lowe * The only way we could have gotten here if there is no FP unit 1011f0089e39SRichard Lowe * is via a user executing an INT $19 instruction, so there is 1012f0089e39SRichard Lowe * no fault in that case. 1013f0089e39SRichard Lowe */ 1014f0089e39SRichard Lowe if (!fpu_exists) 1015f0089e39SRichard Lowe return (0); 1016f0089e39SRichard Lowe 1017f0089e39SRichard Lowe /* 1018f0089e39SRichard Lowe * Do an unconditional save of the FP state. If it's dirty (TS=0), 1019f0089e39SRichard Lowe * it'll be saved into the fpu context area passed in (that of the 1020f0089e39SRichard Lowe * current thread). If it's not dirty, then it's safe to just use 1021f0089e39SRichard Lowe * the stored values in the context save area to determine the 1022f0089e39SRichard Lowe * cause of the fault. 1023f0089e39SRichard Lowe */ 1024f0089e39SRichard Lowe fp_save(fp); /* save the FPU state */ 1025f0089e39SRichard Lowe 1026f0089e39SRichard Lowe if (fp_save_mech == FP_XSAVE) { 1027f0089e39SRichard Lowe mxcsr = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_mxcsr; 1028f0089e39SRichard Lowe fp->fpu_regs.kfpu_status = 1029f0089e39SRichard Lowe fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw; 1030f0089e39SRichard Lowe } else { 1031f0089e39SRichard Lowe mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx->fx_mxcsr; 1032f0089e39SRichard Lowe fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw; 1033f0089e39SRichard Lowe } 1034f0089e39SRichard Lowe fp->fpu_regs.kfpu_xstatus = mxcsr; 1035f0089e39SRichard Lowe 1036f0089e39SRichard Lowe /* 1037f0089e39SRichard Lowe * compute the mask that determines which conditions can cause 1038f0089e39SRichard Lowe * a #xm exception, and use this to clean the status bits so that 1039f0089e39SRichard Lowe * we can identify the true cause of this one. 1040f0089e39SRichard Lowe */ 1041f0089e39SRichard Lowe xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS; 1042f0089e39SRichard Lowe return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask)); 1043f0089e39SRichard Lowe } 1044f0089e39SRichard Lowe 1045f0089e39SRichard Lowe /* 1046f0089e39SRichard Lowe * In the unlikely event that someone is relying on this subcode being 1047f0089e39SRichard Lowe * FPE_FLTILL for denormalize exceptions, it can always be patched back 1048f0089e39SRichard Lowe * again to restore old behaviour. 1049f0089e39SRichard Lowe */ 1050f0089e39SRichard Lowe int fpe_fltden = FPE_FLTDEN; 1051f0089e39SRichard Lowe 1052f0089e39SRichard Lowe /* 1053f0089e39SRichard Lowe * Map from the FPU status word to the FP exception si_code. 1054f0089e39SRichard Lowe */ 1055f0089e39SRichard Lowe static int 1056f0089e39SRichard Lowe fpe_sicode(uint_t sw) 1057f0089e39SRichard Lowe { 1058f0089e39SRichard Lowe if (sw & FPS_IE) 1059f0089e39SRichard Lowe return (FPE_FLTINV); 1060f0089e39SRichard Lowe if (sw & FPS_ZE) 1061f0089e39SRichard Lowe return (FPE_FLTDIV); 1062f0089e39SRichard Lowe if (sw & FPS_DE) 1063f0089e39SRichard Lowe return (fpe_fltden); 1064f0089e39SRichard Lowe if (sw & FPS_OE) 1065f0089e39SRichard Lowe return (FPE_FLTOVF); 1066f0089e39SRichard Lowe if (sw & FPS_UE) 1067f0089e39SRichard Lowe return (FPE_FLTUND); 1068f0089e39SRichard Lowe if (sw & FPS_PE) 1069f0089e39SRichard Lowe return (FPE_FLTRES); 1070f0089e39SRichard Lowe return (FPE_FLTINV); /* default si_code for other exceptions */ 1071f0089e39SRichard Lowe } 1072f0089e39SRichard Lowe 1073f0089e39SRichard Lowe /* 1074f0089e39SRichard Lowe * Map from the SSE status word to the FP exception si_code. 1075f0089e39SRichard Lowe */ 1076f0089e39SRichard Lowe static int 1077f0089e39SRichard Lowe fpe_simd_sicode(uint_t sw) 1078f0089e39SRichard Lowe { 1079f0089e39SRichard Lowe if (sw & SSE_IE) 1080f0089e39SRichard Lowe return (FPE_FLTINV); 1081f0089e39SRichard Lowe if (sw & SSE_ZE) 1082f0089e39SRichard Lowe return (FPE_FLTDIV); 1083f0089e39SRichard Lowe if (sw & SSE_DE) 1084f0089e39SRichard Lowe return (FPE_FLTDEN); 1085f0089e39SRichard Lowe if (sw & SSE_OE) 1086f0089e39SRichard Lowe return (FPE_FLTOVF); 1087f0089e39SRichard Lowe if (sw & SSE_UE) 1088f0089e39SRichard Lowe return (FPE_FLTUND); 1089f0089e39SRichard Lowe if (sw & SSE_PE) 1090f0089e39SRichard Lowe return (FPE_FLTRES); 1091f0089e39SRichard Lowe return (FPE_FLTINV); /* default si_code for other exceptions */ 1092f0089e39SRichard Lowe } 1093f0089e39SRichard Lowe 1094f0089e39SRichard Lowe /* 1095f0089e39SRichard Lowe * This routine is invoked as part of libc's __fpstart implementation 1096f0089e39SRichard Lowe * via sysi86(2). 1097f0089e39SRichard Lowe * 1098f0089e39SRichard Lowe * It may be called -before- any context has been assigned in which case 1099f0089e39SRichard Lowe * we try and avoid touching the hardware. Or it may be invoked well 1100f0089e39SRichard Lowe * after the context has been assigned and fiddled with, in which case 1101f0089e39SRichard Lowe * just tweak it directly. 1102f0089e39SRichard Lowe */ 1103f0089e39SRichard Lowe void 1104f0089e39SRichard Lowe fpsetcw(uint16_t fcw, uint32_t mxcsr) 1105f0089e39SRichard Lowe { 1106f0089e39SRichard Lowe struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu; 1107f0089e39SRichard Lowe struct fxsave_state *fx; 1108f0089e39SRichard Lowe 1109f0089e39SRichard Lowe if (!fpu_exists || fp_kind == FP_NO) 1110f0089e39SRichard Lowe return; 1111f0089e39SRichard Lowe 1112f0089e39SRichard Lowe if ((fp->fpu_flags & FPU_EN) == 0) { 1113f0089e39SRichard Lowe if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) { 1114f0089e39SRichard Lowe /* 1115f0089e39SRichard Lowe * Common case. Floating point unit not yet 1116f0089e39SRichard Lowe * enabled, and kernel already intends to initialize 1117f0089e39SRichard Lowe * the hardware the way the caller wants. 1118f0089e39SRichard Lowe */ 1119f0089e39SRichard Lowe return; 1120f0089e39SRichard Lowe } 1121f0089e39SRichard Lowe /* 1122f0089e39SRichard Lowe * Hmm. Userland wants a different default. 1123f0089e39SRichard Lowe * Do a fake "first trap" to establish the context, then 1124f0089e39SRichard Lowe * handle as if we already had a context before we came in. 1125f0089e39SRichard Lowe */ 1126f0089e39SRichard Lowe kpreempt_disable(); 1127f0089e39SRichard Lowe fp_seed(); 1128f0089e39SRichard Lowe kpreempt_enable(); 1129f0089e39SRichard Lowe } 1130f0089e39SRichard Lowe 1131f0089e39SRichard Lowe /* 1132f0089e39SRichard Lowe * Ensure that the current hardware state is flushed back to the 1133f0089e39SRichard Lowe * pcb, then modify that copy. Next use of the fp will 1134f0089e39SRichard Lowe * restore the context. 1135f0089e39SRichard Lowe */ 1136f0089e39SRichard Lowe fp_save(fp); 1137f0089e39SRichard Lowe 1138f0089e39SRichard Lowe switch (fp_save_mech) { 1139f0089e39SRichard Lowe case FP_FXSAVE: 1140f0089e39SRichard Lowe fx = fp->fpu_regs.kfpu_u.kfpu_fx; 1141f0089e39SRichard Lowe fx->fx_fcw = fcw; 1142f0089e39SRichard Lowe fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 1143f0089e39SRichard Lowe break; 1144f0089e39SRichard Lowe 1145f0089e39SRichard Lowe case FP_XSAVE: 1146f0089e39SRichard Lowe fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave; 1147f0089e39SRichard Lowe fx->fx_fcw = fcw; 1148f0089e39SRichard Lowe fx->fx_mxcsr = sse_mxcsr_mask & mxcsr; 1149f0089e39SRichard Lowe /* 1150f0089e39SRichard Lowe * Always set LEGACY_FP as it may have been cleared by XSAVE 1151f0089e39SRichard Lowe * instruction 1152f0089e39SRichard Lowe */ 1153*957246c9SPatrick Mooney fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |= 1154*957246c9SPatrick Mooney XFEATURE_LEGACY_FP; 1155f0089e39SRichard Lowe break; 1156f0089e39SRichard Lowe default: 1157f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 1158f0089e39SRichard Lowe /*NOTREACHED*/ 1159f0089e39SRichard Lowe } 1160f0089e39SRichard Lowe } 1161f0089e39SRichard Lowe 1162f0089e39SRichard Lowe static void 1163f0089e39SRichard Lowe kernel_fpu_fpstate_init(kfpu_state_t *kfpu) 1164f0089e39SRichard Lowe { 1165f0089e39SRichard Lowe struct xsave_state *xs; 1166f0089e39SRichard Lowe 1167f0089e39SRichard Lowe switch (fp_save_mech) { 1168f0089e39SRichard Lowe case FP_FXSAVE: 1169f0089e39SRichard Lowe bcopy(&sse_initial, kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_fx, 1170f0089e39SRichard Lowe sizeof (struct fxsave_state)); 1171f0089e39SRichard Lowe kfpu->kfpu_ctx.fpu_xsave_mask = 0; 1172f0089e39SRichard Lowe break; 1173f0089e39SRichard Lowe case FP_XSAVE: 1174f0089e39SRichard Lowe xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs; 1175f0089e39SRichard Lowe bzero(xs, cpuid_get_xsave_size()); 1176f0089e39SRichard Lowe bcopy(&avx_initial, xs, sizeof (*xs)); 1177*957246c9SPatrick Mooney xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE; 1178f0089e39SRichard Lowe kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL; 1179f0089e39SRichard Lowe break; 1180f0089e39SRichard Lowe default: 1181f0089e39SRichard Lowe panic("invalid fp_save_mech"); 1182f0089e39SRichard Lowe } 1183f0089e39SRichard Lowe 1184f0089e39SRichard Lowe /* 1185f0089e39SRichard Lowe * Set the corresponding flags that the system expects on the FPU state 1186f0089e39SRichard Lowe * to indicate that this is our state. The FPU_EN flag is required to 1187f0089e39SRichard Lowe * indicate that FPU usage is allowed. The FPU_KERN flag is explicitly 1188f0089e39SRichard Lowe * not set below as it represents that this state is being suppressed 1189f0089e39SRichard Lowe * by the kernel. 1190f0089e39SRichard Lowe */ 1191f0089e39SRichard Lowe kfpu->kfpu_ctx.fpu_flags = FPU_EN | FPU_VALID; 1192f0089e39SRichard Lowe kfpu->kfpu_flags |= KFPU_F_INITIALIZED; 1193f0089e39SRichard Lowe } 1194f0089e39SRichard Lowe 1195f0089e39SRichard Lowe kfpu_state_t * 1196f0089e39SRichard Lowe kernel_fpu_alloc(int kmflags) 1197f0089e39SRichard Lowe { 1198f0089e39SRichard Lowe kfpu_state_t *kfpu; 1199f0089e39SRichard Lowe 1200f0089e39SRichard Lowe if ((kfpu = kmem_zalloc(sizeof (kfpu_state_t), kmflags)) == NULL) { 1201f0089e39SRichard Lowe return (NULL); 1202f0089e39SRichard Lowe } 1203f0089e39SRichard Lowe 1204f0089e39SRichard Lowe kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic = 1205f0089e39SRichard Lowe kmem_cache_alloc(fpsave_cachep, kmflags); 1206f0089e39SRichard Lowe if (kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic == NULL) { 1207f0089e39SRichard Lowe kmem_free(kfpu, sizeof (kfpu_state_t)); 1208f0089e39SRichard Lowe return (NULL); 1209f0089e39SRichard Lowe } 1210f0089e39SRichard Lowe 1211f0089e39SRichard Lowe kernel_fpu_fpstate_init(kfpu); 1212f0089e39SRichard Lowe 1213f0089e39SRichard Lowe return (kfpu); 1214f0089e39SRichard Lowe } 1215f0089e39SRichard Lowe 1216f0089e39SRichard Lowe void 1217f0089e39SRichard Lowe kernel_fpu_free(kfpu_state_t *kfpu) 1218f0089e39SRichard Lowe { 1219f0089e39SRichard Lowe kmem_cache_free(fpsave_cachep, 1220f0089e39SRichard Lowe kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic); 1221f0089e39SRichard Lowe kmem_free(kfpu, sizeof (kfpu_state_t)); 1222f0089e39SRichard Lowe } 1223f0089e39SRichard Lowe 1224f0089e39SRichard Lowe static void 1225f0089e39SRichard Lowe kernel_fpu_ctx_save(void *arg) 1226f0089e39SRichard Lowe { 1227f0089e39SRichard Lowe kfpu_state_t *kfpu = arg; 1228f0089e39SRichard Lowe fpu_ctx_t *pf; 1229f0089e39SRichard Lowe 1230f0089e39SRichard Lowe if (kfpu == NULL) { 1231f0089e39SRichard Lowe /* 1232f0089e39SRichard Lowe * A NULL kfpu implies this is a kernel thread with an LWP and 1233f0089e39SRichard Lowe * no user-level FPU usage. Use the lwp fpu save area. 1234f0089e39SRichard Lowe */ 1235f0089e39SRichard Lowe pf = &curthread->t_lwp->lwp_pcb.pcb_fpu; 1236f0089e39SRichard Lowe 1237f0089e39SRichard Lowe ASSERT(curthread->t_procp->p_flag & SSYS); 1238f0089e39SRichard Lowe ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0); 1239f0089e39SRichard Lowe 1240f0089e39SRichard Lowe fp_save(pf); 1241f0089e39SRichard Lowe } else { 1242f0089e39SRichard Lowe pf = &kfpu->kfpu_ctx; 1243f0089e39SRichard Lowe 1244f0089e39SRichard Lowe ASSERT3P(kfpu->kfpu_curthread, ==, curthread); 1245f0089e39SRichard Lowe ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0); 1246f0089e39SRichard Lowe 1247f0089e39SRichard Lowe /* 1248f0089e39SRichard Lowe * Note, we can't use fp_save because it assumes that we're 1249f0089e39SRichard Lowe * saving to the thread's PCB and not somewhere else. Because 1250f0089e39SRichard Lowe * this is a different FPU context, we instead have to do this 1251f0089e39SRichard Lowe * ourselves. 1252f0089e39SRichard Lowe */ 1253f0089e39SRichard Lowe switch (fp_save_mech) { 1254f0089e39SRichard Lowe case FP_FXSAVE: 1255f0089e39SRichard Lowe fpxsave(pf->fpu_regs.kfpu_u.kfpu_fx); 1256f0089e39SRichard Lowe break; 1257f0089e39SRichard Lowe case FP_XSAVE: 1258f0089e39SRichard Lowe xsavep(pf->fpu_regs.kfpu_u.kfpu_xs, pf->fpu_xsave_mask); 1259f0089e39SRichard Lowe break; 1260f0089e39SRichard Lowe default: 1261f0089e39SRichard Lowe panic("Invalid fp_save_mech"); 1262f0089e39SRichard Lowe } 1263f0089e39SRichard Lowe 1264f0089e39SRichard Lowe /* 1265f0089e39SRichard Lowe * Because we have saved context here, our save state is no 1266f0089e39SRichard Lowe * longer valid and therefore needs to be reinitialized. 1267f0089e39SRichard Lowe */ 1268f0089e39SRichard Lowe kfpu->kfpu_flags &= ~KFPU_F_INITIALIZED; 1269f0089e39SRichard Lowe } 1270f0089e39SRichard Lowe 1271f0089e39SRichard Lowe pf->fpu_flags |= FPU_VALID; 1272f0089e39SRichard Lowe 1273f0089e39SRichard Lowe /* 1274f0089e39SRichard Lowe * Clear KFPU flag. This allows swtch to check for improper kernel 1275f0089e39SRichard Lowe * usage of the FPU (i.e. switching to a new thread while the old 1276f0089e39SRichard Lowe * thread was in the kernel and using the FPU, but did not perform a 1277f0089e39SRichard Lowe * context save). 1278f0089e39SRichard Lowe */ 1279f0089e39SRichard Lowe curthread->t_flag &= ~T_KFPU; 1280f0089e39SRichard Lowe } 1281f0089e39SRichard Lowe 1282f0089e39SRichard Lowe static void 1283f0089e39SRichard Lowe kernel_fpu_ctx_restore(void *arg) 1284f0089e39SRichard Lowe { 1285f0089e39SRichard Lowe kfpu_state_t *kfpu = arg; 1286f0089e39SRichard Lowe fpu_ctx_t *pf; 1287f0089e39SRichard Lowe 1288f0089e39SRichard Lowe if (kfpu == NULL) { 1289f0089e39SRichard Lowe /* 1290f0089e39SRichard Lowe * A NULL kfpu implies this is a kernel thread with an LWP and 1291f0089e39SRichard Lowe * no user-level FPU usage. Use the lwp fpu save area. 1292f0089e39SRichard Lowe */ 1293f0089e39SRichard Lowe pf = &curthread->t_lwp->lwp_pcb.pcb_fpu; 1294f0089e39SRichard Lowe 1295f0089e39SRichard Lowe ASSERT(curthread->t_procp->p_flag & SSYS); 1296f0089e39SRichard Lowe ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0); 1297f0089e39SRichard Lowe } else { 1298f0089e39SRichard Lowe pf = &kfpu->kfpu_ctx; 1299f0089e39SRichard Lowe 1300f0089e39SRichard Lowe ASSERT3P(kfpu->kfpu_curthread, ==, curthread); 1301f0089e39SRichard Lowe ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0); 1302f0089e39SRichard Lowe } 1303f0089e39SRichard Lowe 1304f0089e39SRichard Lowe fp_restore(pf); 1305f0089e39SRichard Lowe curthread->t_flag |= T_KFPU; 1306f0089e39SRichard Lowe } 1307f0089e39SRichard Lowe 1308f0089e39SRichard Lowe /* 1309f0089e39SRichard Lowe * Validate that the thread is not switching off-cpu while actively using the 1310f0089e39SRichard Lowe * FPU within the kernel. 1311f0089e39SRichard Lowe */ 1312f0089e39SRichard Lowe void 1313f0089e39SRichard Lowe kernel_fpu_no_swtch(void) 1314f0089e39SRichard Lowe { 1315f0089e39SRichard Lowe if ((curthread->t_flag & T_KFPU) != 0) { 1316f0089e39SRichard Lowe panic("curthread swtch-ing while the kernel is using the FPU"); 1317f0089e39SRichard Lowe } 1318f0089e39SRichard Lowe } 1319f0089e39SRichard Lowe 13205a469116SPatrick Mooney static const struct ctxop_template kfpu_ctxop_tpl = { 13215a469116SPatrick Mooney .ct_rev = CTXOP_TPL_REV, 13225a469116SPatrick Mooney .ct_save = kernel_fpu_ctx_save, 13235a469116SPatrick Mooney .ct_restore = kernel_fpu_ctx_restore, 13245a469116SPatrick Mooney }; 13255a469116SPatrick Mooney 1326f0089e39SRichard Lowe void 1327f0089e39SRichard Lowe kernel_fpu_begin(kfpu_state_t *kfpu, uint_t flags) 1328f0089e39SRichard Lowe { 1329f0089e39SRichard Lowe klwp_t *pl = curthread->t_lwp; 1330f0089e39SRichard Lowe struct ctxop *ctx; 1331f0089e39SRichard Lowe 1332f0089e39SRichard Lowe if ((curthread->t_flag & T_KFPU) != 0) { 1333f0089e39SRichard Lowe panic("curthread attempting to nest kernel FPU states"); 1334f0089e39SRichard Lowe } 1335f0089e39SRichard Lowe 1336f0089e39SRichard Lowe /* KFPU_USE_LWP and KFPU_NO_STATE are mutually exclusive. */ 1337f0089e39SRichard Lowe ASSERT((flags & (KFPU_USE_LWP | KFPU_NO_STATE)) != 1338f0089e39SRichard Lowe (KFPU_USE_LWP | KFPU_NO_STATE)); 1339f0089e39SRichard Lowe 1340f0089e39SRichard Lowe if ((flags & KFPU_NO_STATE) == KFPU_NO_STATE) { 1341f0089e39SRichard Lowe /* 1342f0089e39SRichard Lowe * Since we don't have a kfpu_state or usable lwp pcb_fpu to 1343f0089e39SRichard Lowe * hold our kernel FPU context, we depend on the caller doing 1344f0089e39SRichard Lowe * kpreempt_disable for the duration of our FPU usage. This 1345f0089e39SRichard Lowe * should only be done for very short periods of time. 1346f0089e39SRichard Lowe */ 1347f0089e39SRichard Lowe ASSERT(curthread->t_preempt > 0); 1348f0089e39SRichard Lowe ASSERT(kfpu == NULL); 1349f0089e39SRichard Lowe 1350f0089e39SRichard Lowe if (pl != NULL) { 1351f0089e39SRichard Lowe /* 1352f0089e39SRichard Lowe * We might have already saved once so FPU_VALID could 1353f0089e39SRichard Lowe * be set. This is handled in fp_save. 1354f0089e39SRichard Lowe */ 1355f0089e39SRichard Lowe fp_save(&pl->lwp_pcb.pcb_fpu); 1356f0089e39SRichard Lowe pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL; 1357f0089e39SRichard Lowe } 1358f0089e39SRichard Lowe 1359f0089e39SRichard Lowe curthread->t_flag |= T_KFPU; 1360f0089e39SRichard Lowe 1361f0089e39SRichard Lowe /* Always restore the fpu to the initial state. */ 1362f0089e39SRichard Lowe fpinit(); 1363f0089e39SRichard Lowe 1364f0089e39SRichard Lowe return; 1365f0089e39SRichard Lowe } 1366f0089e39SRichard Lowe 1367f0089e39SRichard Lowe /* 1368f0089e39SRichard Lowe * We either have a kfpu, or are using the LWP pcb_fpu for context ops. 1369f0089e39SRichard Lowe */ 1370f0089e39SRichard Lowe 1371f0089e39SRichard Lowe if ((flags & KFPU_USE_LWP) == 0) { 1372f0089e39SRichard Lowe if (kfpu->kfpu_curthread != NULL) 1373f0089e39SRichard Lowe panic("attempting to reuse kernel FPU state at %p when " 1374f0089e39SRichard Lowe "another thread already is using", kfpu); 1375f0089e39SRichard Lowe 1376f0089e39SRichard Lowe if ((kfpu->kfpu_flags & KFPU_F_INITIALIZED) == 0) 1377f0089e39SRichard Lowe kernel_fpu_fpstate_init(kfpu); 1378f0089e39SRichard Lowe 1379f0089e39SRichard Lowe kfpu->kfpu_curthread = curthread; 1380f0089e39SRichard Lowe } 1381f0089e39SRichard Lowe 1382f0089e39SRichard Lowe /* 1383f0089e39SRichard Lowe * Not all threads may have an active LWP. If they do and we're not 1384f0089e39SRichard Lowe * going to re-use the LWP, then we should go ahead and save the state. 1385f0089e39SRichard Lowe * We must also note that the fpu is now being used by the kernel and 1386f0089e39SRichard Lowe * therefore we do not want to manage the fpu state via the user-level 1387f0089e39SRichard Lowe * thread's context handlers. 1388f0089e39SRichard Lowe * 1389f0089e39SRichard Lowe * We might have already saved once (due to a prior use of the kernel 1390f0089e39SRichard Lowe * FPU or another code path) so FPU_VALID could be set. This is handled 1391f0089e39SRichard Lowe * by fp_save, as is the FPU_EN check. 1392f0089e39SRichard Lowe */ 13935a469116SPatrick Mooney ctx = ctxop_allocate(&kfpu_ctxop_tpl, kfpu); 1394f0089e39SRichard Lowe kpreempt_disable(); 1395f0089e39SRichard Lowe if (pl != NULL) { 1396f0089e39SRichard Lowe if ((flags & KFPU_USE_LWP) == 0) 1397f0089e39SRichard Lowe fp_save(&pl->lwp_pcb.pcb_fpu); 1398f0089e39SRichard Lowe pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL; 1399f0089e39SRichard Lowe } 1400f0089e39SRichard Lowe 1401f0089e39SRichard Lowe /* 14025a469116SPatrick Mooney * Set the context operations for kernel FPU usage. Because kernel FPU 14035a469116SPatrick Mooney * setup and ctxop attachment needs to happen under the protection of 14045a469116SPatrick Mooney * kpreempt_disable(), we allocate the ctxop outside the guard so its 14055a469116SPatrick Mooney * sleeping allocation will not cause a voluntary swtch(). This allows 14065a469116SPatrick Mooney * the rest of the initialization to proceed, ensuring valid state for 14075a469116SPatrick Mooney * the ctxop handlers. 1408f0089e39SRichard Lowe */ 14095a469116SPatrick Mooney ctxop_attach(curthread, ctx); 1410f0089e39SRichard Lowe curthread->t_flag |= T_KFPU; 1411f0089e39SRichard Lowe 1412f0089e39SRichard Lowe if ((flags & KFPU_USE_LWP) == KFPU_USE_LWP) { 1413f0089e39SRichard Lowe /* 1414f0089e39SRichard Lowe * For pure kernel threads with an LWP, we can use the LWP's 1415f0089e39SRichard Lowe * pcb_fpu to save/restore context. 1416f0089e39SRichard Lowe */ 1417f0089e39SRichard Lowe fpu_ctx_t *pf = &pl->lwp_pcb.pcb_fpu; 1418f0089e39SRichard Lowe 1419f0089e39SRichard Lowe VERIFY(curthread->t_procp->p_flag & SSYS); 1420f0089e39SRichard Lowe VERIFY(kfpu == NULL); 1421f0089e39SRichard Lowe ASSERT((pf->fpu_flags & FPU_EN) == 0); 1422f0089e39SRichard Lowe 1423f0089e39SRichard Lowe /* Always restore the fpu to the initial state. */ 1424f0089e39SRichard Lowe if (fp_save_mech == FP_XSAVE) 1425f0089e39SRichard Lowe pf->fpu_xsave_mask = XFEATURE_FP_ALL; 1426f0089e39SRichard Lowe fpinit(); 1427f0089e39SRichard Lowe pf->fpu_flags = FPU_EN | FPU_KERNEL; 1428f0089e39SRichard Lowe } else { 1429f0089e39SRichard Lowe /* initialize the kfpu state */ 1430f0089e39SRichard Lowe kernel_fpu_ctx_restore(kfpu); 1431f0089e39SRichard Lowe } 1432f0089e39SRichard Lowe kpreempt_enable(); 1433f0089e39SRichard Lowe } 1434f0089e39SRichard Lowe 1435f0089e39SRichard Lowe void 1436f0089e39SRichard Lowe kernel_fpu_end(kfpu_state_t *kfpu, uint_t flags) 1437f0089e39SRichard Lowe { 1438f0089e39SRichard Lowe if ((curthread->t_flag & T_KFPU) == 0) { 1439f0089e39SRichard Lowe panic("curthread attempting to clear kernel FPU state " 1440f0089e39SRichard Lowe "without using it"); 1441f0089e39SRichard Lowe } 1442f0089e39SRichard Lowe 1443f0089e39SRichard Lowe /* 1444f0089e39SRichard Lowe * General comments on why the rest of this function is structured the 1445f0089e39SRichard Lowe * way it is. Be aware that there is a lot of subtlety here. 1446f0089e39SRichard Lowe * 1447f0089e39SRichard Lowe * If a user-level thread ever uses the fpu while in the kernel, then 1448f0089e39SRichard Lowe * we cannot call fpdisable since that does STTS. That will set the 1449f0089e39SRichard Lowe * ts bit in %cr0 which will cause an exception if anything touches the 1450f0089e39SRichard Lowe * fpu. However, the user-level context switch handler (fpsave_ctxt) 1451f0089e39SRichard Lowe * needs to access the fpu to save the registers into the pcb. 1452f0089e39SRichard Lowe * fpsave_ctxt relies on CLTS having been done to clear the ts bit in 1453f0089e39SRichard Lowe * fprestore_ctxt when the thread context switched onto the CPU. 1454f0089e39SRichard Lowe * 1455f0089e39SRichard Lowe * Calling fpdisable only effects the current CPU's %cr0 register. 1456f0089e39SRichard Lowe * 14575a469116SPatrick Mooney * During ctxop_remove and kpreempt_enable, we can voluntarily context 1458f0089e39SRichard Lowe * switch, so the CPU we were on when we entered this function might 14595a469116SPatrick Mooney * not be the same one we're on when we return from ctxop_remove or end 1460f0089e39SRichard Lowe * the function. Note there can be user-level context switch handlers 1461f0089e39SRichard Lowe * still installed if this is a user-level thread. 1462f0089e39SRichard Lowe * 1463f0089e39SRichard Lowe * We also must be careful in the unlikely chance we're running in an 1464f0089e39SRichard Lowe * interrupt thread, since we can't leave the CPU's %cr0 TS state set 1465f0089e39SRichard Lowe * incorrectly for the "real" thread to resume on this CPU. 1466f0089e39SRichard Lowe */ 1467f0089e39SRichard Lowe 1468f0089e39SRichard Lowe if ((flags & KFPU_NO_STATE) == 0) { 1469f0089e39SRichard Lowe kpreempt_disable(); 1470f0089e39SRichard Lowe } else { 1471f0089e39SRichard Lowe ASSERT(curthread->t_preempt > 0); 1472f0089e39SRichard Lowe } 1473f0089e39SRichard Lowe 1474f0089e39SRichard Lowe curthread->t_flag &= ~T_KFPU; 1475f0089e39SRichard Lowe 1476f0089e39SRichard Lowe /* 1477f0089e39SRichard Lowe * When we are ending things, we explicitly don't save the current 1478f0089e39SRichard Lowe * kernel FPU state back to the temporary state. The kfpu API is not 1479f0089e39SRichard Lowe * intended to be a permanent save location. 1480f0089e39SRichard Lowe * 1481f0089e39SRichard Lowe * If this is a user-level thread and we were to context switch 1482f0089e39SRichard Lowe * before returning to user-land, fpsave_ctxt will be a no-op since we 1483f0089e39SRichard Lowe * already saved the user-level FPU state the first time we run 1484f0089e39SRichard Lowe * kernel_fpu_begin (i.e. we won't save the bad kernel fpu state over 1485f0089e39SRichard Lowe * the user-level fpu state). The fpsave_ctxt functions only save if 1486f0089e39SRichard Lowe * FPU_VALID is not already set. fp_save also set PCB_SET_UPDATE_FPU so 1487f0089e39SRichard Lowe * fprestore_ctxt will be done in sys_rtt_common when the thread 1488f0089e39SRichard Lowe * finally returns to user-land. 1489f0089e39SRichard Lowe */ 1490f0089e39SRichard Lowe 1491f0089e39SRichard Lowe if ((curthread->t_procp->p_flag & SSYS) != 0 && 1492f0089e39SRichard Lowe curthread->t_intr == NULL) { 1493f0089e39SRichard Lowe /* 1494f0089e39SRichard Lowe * A kernel thread which is not an interrupt thread, so we 1495f0089e39SRichard Lowe * STTS now. 1496f0089e39SRichard Lowe */ 1497f0089e39SRichard Lowe fpdisable(); 1498f0089e39SRichard Lowe } 1499f0089e39SRichard Lowe 1500f0089e39SRichard Lowe if ((flags & KFPU_NO_STATE) == 0) { 15015a469116SPatrick Mooney ctxop_remove(curthread, &kfpu_ctxop_tpl, kfpu); 1502f0089e39SRichard Lowe 1503f0089e39SRichard Lowe if (kfpu != NULL) { 1504f0089e39SRichard Lowe if (kfpu->kfpu_curthread != curthread) { 1505f0089e39SRichard Lowe panic("attempting to end kernel FPU state " 1506f0089e39SRichard Lowe "for %p, but active thread is not " 1507f0089e39SRichard Lowe "curthread", kfpu); 1508f0089e39SRichard Lowe } else { 1509f0089e39SRichard Lowe kfpu->kfpu_curthread = NULL; 1510f0089e39SRichard Lowe } 1511f0089e39SRichard Lowe } 1512f0089e39SRichard Lowe 1513f0089e39SRichard Lowe kpreempt_enable(); 1514f0089e39SRichard Lowe } 1515f0089e39SRichard Lowe 1516f0089e39SRichard Lowe if (curthread->t_lwp != NULL) { 1517f0089e39SRichard Lowe uint_t f; 1518f0089e39SRichard Lowe 1519f0089e39SRichard Lowe if (flags & KFPU_USE_LWP) { 1520f0089e39SRichard Lowe f = FPU_EN | FPU_KERNEL; 1521f0089e39SRichard Lowe } else { 1522f0089e39SRichard Lowe f = FPU_KERNEL; 1523f0089e39SRichard Lowe } 1524f0089e39SRichard Lowe curthread->t_lwp->lwp_pcb.pcb_fpu.fpu_flags &= ~f; 1525f0089e39SRichard Lowe } 1526f0089e39SRichard Lowe } 1527