xref: /illumos-gate/usr/src/uts/intel/os/fpu.c (revision 957246c9e6c47389c40079995d73eebcc659fb29)
1f0089e39SRichard Lowe /*
2f0089e39SRichard Lowe  * CDDL HEADER START
3f0089e39SRichard Lowe  *
4f0089e39SRichard Lowe  * The contents of this file are subject to the terms of the
5f0089e39SRichard Lowe  * Common Development and Distribution License (the "License").
6f0089e39SRichard Lowe  * You may not use this file except in compliance with the License.
7f0089e39SRichard Lowe  *
8f0089e39SRichard Lowe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9f0089e39SRichard Lowe  * or http://www.opensolaris.org/os/licensing.
10f0089e39SRichard Lowe  * See the License for the specific language governing permissions
11f0089e39SRichard Lowe  * and limitations under the License.
12f0089e39SRichard Lowe  *
13f0089e39SRichard Lowe  * When distributing Covered Code, include this CDDL HEADER in each
14f0089e39SRichard Lowe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15f0089e39SRichard Lowe  * If applicable, add the following below this CDDL HEADER, with the
16f0089e39SRichard Lowe  * fields enclosed by brackets "[]" replaced with your own identifying
17f0089e39SRichard Lowe  * information: Portions Copyright [yyyy] [name of copyright owner]
18f0089e39SRichard Lowe  *
19f0089e39SRichard Lowe  * CDDL HEADER END
20f0089e39SRichard Lowe  */
21f0089e39SRichard Lowe /*
22f0089e39SRichard Lowe  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23f0089e39SRichard Lowe  * Copyright 2021 Joyent, Inc.
24f0089e39SRichard Lowe  * Copyright 2021 RackTop Systems, Inc.
25*957246c9SPatrick Mooney  * Copyright 2022 Oxide Computer Company
26f0089e39SRichard Lowe  */
27f0089e39SRichard Lowe 
28f0089e39SRichard Lowe /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
29f0089e39SRichard Lowe /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
30f0089e39SRichard Lowe /*		All Rights Reserved				*/
31f0089e39SRichard Lowe 
32f0089e39SRichard Lowe /*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
33f0089e39SRichard Lowe /*		All Rights Reserved				*/
34f0089e39SRichard Lowe 
35f0089e39SRichard Lowe /*
36f0089e39SRichard Lowe  * Copyright (c) 2009, Intel Corporation.
37f0089e39SRichard Lowe  * All rights reserved.
38f0089e39SRichard Lowe  */
39f0089e39SRichard Lowe 
40f0089e39SRichard Lowe #include <sys/types.h>
41f0089e39SRichard Lowe #include <sys/param.h>
42f0089e39SRichard Lowe #include <sys/signal.h>
43f0089e39SRichard Lowe #include <sys/regset.h>
44f0089e39SRichard Lowe #include <sys/privregs.h>
45f0089e39SRichard Lowe #include <sys/psw.h>
46f0089e39SRichard Lowe #include <sys/trap.h>
47f0089e39SRichard Lowe #include <sys/fault.h>
48f0089e39SRichard Lowe #include <sys/systm.h>
49f0089e39SRichard Lowe #include <sys/user.h>
50f0089e39SRichard Lowe #include <sys/file.h>
51f0089e39SRichard Lowe #include <sys/proc.h>
52f0089e39SRichard Lowe #include <sys/pcb.h>
53f0089e39SRichard Lowe #include <sys/lwp.h>
54f0089e39SRichard Lowe #include <sys/cpuvar.h>
55f0089e39SRichard Lowe #include <sys/thread.h>
56f0089e39SRichard Lowe #include <sys/disp.h>
57f0089e39SRichard Lowe #include <sys/fp.h>
58f0089e39SRichard Lowe #include <sys/siginfo.h>
59f0089e39SRichard Lowe #include <sys/archsystm.h>
60f0089e39SRichard Lowe #include <sys/kmem.h>
61f0089e39SRichard Lowe #include <sys/debug.h>
62f0089e39SRichard Lowe #include <sys/x86_archext.h>
63f0089e39SRichard Lowe #include <sys/sysmacros.h>
64f0089e39SRichard Lowe #include <sys/cmn_err.h>
65f0089e39SRichard Lowe #include <sys/kfpu.h>
66f0089e39SRichard Lowe 
67f0089e39SRichard Lowe /*
68f0089e39SRichard Lowe  * FPU Management Overview
69f0089e39SRichard Lowe  * -----------------------
70f0089e39SRichard Lowe  *
71f0089e39SRichard Lowe  * The x86 FPU has evolved substantially since its days as the x87 coprocessor;
72f0089e39SRichard Lowe  * however, many aspects of its life as a coprocessor are still around in x86.
73f0089e39SRichard Lowe  *
74f0089e39SRichard Lowe  * Today, when we refer to the 'FPU', we don't just mean the original x87 FPU.
75f0089e39SRichard Lowe  * While that state still exists, there is much more that is covered by the FPU.
76f0089e39SRichard Lowe  * Today, this includes not just traditional FPU state, but also supervisor only
77f0089e39SRichard Lowe  * state. The following state is currently managed and covered logically by the
78f0089e39SRichard Lowe  * idea of the FPU registers:
79f0089e39SRichard Lowe  *
80f0089e39SRichard Lowe  *    o Traditional x87 FPU
81f0089e39SRichard Lowe  *    o Vector Registers (%xmm, %ymm, %zmm)
82f0089e39SRichard Lowe  *    o Memory Protection Extensions (MPX) Bounds Registers
83f0089e39SRichard Lowe  *    o Protected Key Rights Registers (PKRU)
84f0089e39SRichard Lowe  *    o Processor Trace data
85f0089e39SRichard Lowe  *
86f0089e39SRichard Lowe  * The rest of this covers how the FPU is managed and controlled, how state is
87f0089e39SRichard Lowe  * saved and restored between threads, interactions with hypervisors, and other
88f0089e39SRichard Lowe  * information exported to user land through aux vectors. A lot of background
89f0089e39SRichard Lowe  * information is here to synthesize major parts of the Intel SDM, but
90f0089e39SRichard Lowe  * unfortunately, it is not a replacement for reading it.
91f0089e39SRichard Lowe  *
92f0089e39SRichard Lowe  * FPU Control Registers
93f0089e39SRichard Lowe  * ---------------------
94f0089e39SRichard Lowe  *
95f0089e39SRichard Lowe  * Because the x87 FPU began its life as a co-processor and the FPU was
96f0089e39SRichard Lowe  * optional there are several bits that show up in %cr0 that we have to
97f0089e39SRichard Lowe  * manipulate when dealing with the FPU. These are:
98f0089e39SRichard Lowe  *
99f0089e39SRichard Lowe  *   o CR0.ET	The 'extension type' bit. This was used originally to indicate
100f0089e39SRichard Lowe  *		that the FPU co-processor was present. Now it is forced on for
101f0089e39SRichard Lowe  *		compatibility. This is often used to verify whether or not the
102f0089e39SRichard Lowe  *		FPU is present.
103f0089e39SRichard Lowe  *
104f0089e39SRichard Lowe  *   o CR0.NE	The 'native error' bit. Used to indicate that native error
105f0089e39SRichard Lowe  *		mode should be enabled. This indicates that we should take traps
106f0089e39SRichard Lowe  *		on FPU errors. The OS enables this early in boot.
107f0089e39SRichard Lowe  *
108f0089e39SRichard Lowe  *   o CR0.MP	The 'Monitor Coprocessor' bit. Used to control whether or not
109f0089e39SRichard Lowe  *		wait/fwait instructions generate a #NM if CR0.TS is set.
110f0089e39SRichard Lowe  *
111f0089e39SRichard Lowe  *   o CR0.EM	The 'Emulation' bit. This is used to cause floating point
112f0089e39SRichard Lowe  *		operations (x87 through SSE4) to trap with a #UD so they can be
113f0089e39SRichard Lowe  *		emulated. The system never sets this bit, but makes sure it is
114f0089e39SRichard Lowe  *		clear on processor start up.
115f0089e39SRichard Lowe  *
116f0089e39SRichard Lowe  *   o CR0.TS	The 'Task Switched' bit. When this is turned on, a floating
117f0089e39SRichard Lowe  *		point operation will generate a #NM. An fwait will as well,
118f0089e39SRichard Lowe  *		depending on the value in CR0.MP.
119f0089e39SRichard Lowe  *
120f0089e39SRichard Lowe  * Our general policy is that CR0.ET, CR0.NE, and CR0.MP are always set by
121f0089e39SRichard Lowe  * the system. Similarly CR0.EM is always unset by the system. CR0.TS has a more
122f0089e39SRichard Lowe  * complicated role. Historically it has been used to allow running systems to
123f0089e39SRichard Lowe  * restore the FPU registers lazily. This will be discussed in greater depth
124f0089e39SRichard Lowe  * later on.
125f0089e39SRichard Lowe  *
126f0089e39SRichard Lowe  * %cr4 is also used as part of the FPU control. Specifically we need to worry
127f0089e39SRichard Lowe  * about the following bits in the system:
128f0089e39SRichard Lowe  *
129f0089e39SRichard Lowe  *   o CR4.OSFXSR	This bit is used to indicate that the OS understands and
130f0089e39SRichard Lowe  *			supports the execution of the fxsave and fxrstor
131f0089e39SRichard Lowe  *			instructions. This bit is required to be set to enable
132f0089e39SRichard Lowe  *			the use of the SSE->SSE4 instructions.
133f0089e39SRichard Lowe  *
134f0089e39SRichard Lowe  *   o CR4.OSXMMEXCPT	This bit is used to indicate that the OS can understand
135f0089e39SRichard Lowe  *			and take a SIMD floating point exception (#XM). This bit
136f0089e39SRichard Lowe  *			is always enabled by the system.
137f0089e39SRichard Lowe  *
138f0089e39SRichard Lowe  *   o CR4.OSXSAVE	This bit is used to indicate that the OS understands and
139f0089e39SRichard Lowe  *			supports the execution of the xsave and xrstor family of
140f0089e39SRichard Lowe  *			instructions. This bit is required to use any of the AVX
141f0089e39SRichard Lowe  *			and newer feature sets.
142f0089e39SRichard Lowe  *
143f0089e39SRichard Lowe  * Because all supported processors are 64-bit, they'll always support the XMM
144f0089e39SRichard Lowe  * extensions and we will enable both CR4.OXFXSR and CR4.OSXMMEXCPT in boot.
145f0089e39SRichard Lowe  * CR4.OSXSAVE will be enabled and used whenever xsave is reported in cpuid.
146f0089e39SRichard Lowe  *
147f0089e39SRichard Lowe  * %xcr0 is used to manage the behavior of the xsave feature set and is only
148f0089e39SRichard Lowe  * present on the system if xsave is supported. %xcr0 is read and written to
149f0089e39SRichard Lowe  * through by the xgetbv and xsetbv instructions. This register is present
150f0089e39SRichard Lowe  * whenever the xsave feature set is supported. Each bit in %xcr0 refers to a
151f0089e39SRichard Lowe  * different component of the xsave state and controls whether or not that
152f0089e39SRichard Lowe  * information is saved and restored. For newer feature sets like AVX and MPX,
153f0089e39SRichard Lowe  * it also controls whether or not the corresponding instructions can be
154f0089e39SRichard Lowe  * executed (much like CR0.OSFXSR does for the SSE feature sets).
155f0089e39SRichard Lowe  *
156f0089e39SRichard Lowe  * Everything in %xcr0 is around features available to users. There is also the
157f0089e39SRichard Lowe  * IA32_XSS MSR which is used to control supervisor-only features that are still
158f0089e39SRichard Lowe  * part of the xsave state. Bits that can be set in %xcr0 are reserved in
159f0089e39SRichard Lowe  * IA32_XSS and vice versa. This is an important property that is particularly
160f0089e39SRichard Lowe  * relevant to how the xsave instructions operate.
161f0089e39SRichard Lowe  *
162f0089e39SRichard Lowe  * Save Mechanisms
163f0089e39SRichard Lowe  * ---------------
164f0089e39SRichard Lowe  *
165f0089e39SRichard Lowe  * When switching between running threads the FPU state needs to be saved and
166f0089e39SRichard Lowe  * restored by the OS. If this state was not saved, users would rightfully
167f0089e39SRichard Lowe  * complain about corrupt state. There are three mechanisms that exist on the
168f0089e39SRichard Lowe  * processor for saving and restoring these state images:
169f0089e39SRichard Lowe  *
170f0089e39SRichard Lowe  *   o fsave
171f0089e39SRichard Lowe  *   o fxsave
172f0089e39SRichard Lowe  *   o xsave
173f0089e39SRichard Lowe  *
174f0089e39SRichard Lowe  * fsave saves and restores only the x87 FPU and is the oldest of these
175f0089e39SRichard Lowe  * mechanisms. This mechanism is never used in the kernel today because we are
176f0089e39SRichard Lowe  * always running on systems that support fxsave.
177f0089e39SRichard Lowe  *
178f0089e39SRichard Lowe  * The fxsave and fxrstor mechanism allows the x87 FPU and the SSE register
179f0089e39SRichard Lowe  * state to be saved and restored to and from a struct fxsave_state. This is the
180f0089e39SRichard Lowe  * default mechanism that is used to save and restore the FPU on amd64. An
181f0089e39SRichard Lowe  * important aspect of fxsave that was different from the original i386 fsave
182f0089e39SRichard Lowe  * mechanism is that the restoring of FPU state with pending exceptions will not
183f0089e39SRichard Lowe  * generate an exception, it will be deferred to the next use of the FPU.
184f0089e39SRichard Lowe  *
185f0089e39SRichard Lowe  * The final and by far the most complex mechanism is that of the xsave set.
186f0089e39SRichard Lowe  * xsave allows for saving and restoring all of the traditional x86 pieces (x87
187f0089e39SRichard Lowe  * and SSE), while allowing for extensions that will save the %ymm, %zmm, etc.
188f0089e39SRichard Lowe  * registers.
189f0089e39SRichard Lowe  *
190f0089e39SRichard Lowe  * Data is saved and restored into and out of a struct xsave_state. The first
191f0089e39SRichard Lowe  * part of the struct xsave_state is equivalent to the struct fxsave_state.
192f0089e39SRichard Lowe  * After that, there is a header which is used to describe the remaining
193f0089e39SRichard Lowe  * portions of the state. The header is a 64-byte value of which the first two
194f0089e39SRichard Lowe  * uint64_t values are defined and the rest are reserved and must be zero. The
195f0089e39SRichard Lowe  * first uint64_t is the xstate_bv member. This describes which values in the
196f0089e39SRichard Lowe  * xsave_state are actually valid and present. This is updated on a save and
197f0089e39SRichard Lowe  * used on restore. The second member is the xcomp_bv member. Its last bit
198f0089e39SRichard Lowe  * determines whether or not a compressed version of the structure is used.
199f0089e39SRichard Lowe  *
200f0089e39SRichard Lowe  * When the uncompressed structure is used (currently the only format we
201f0089e39SRichard Lowe  * support), then each state component is at a fixed offset in the structure,
202f0089e39SRichard Lowe  * even if it is not being used. For example, if you only saved the AVX related
203f0089e39SRichard Lowe  * state, but did not save the MPX related state, the offset would not change
204f0089e39SRichard Lowe  * for any component. With the compressed format, components that aren't used
205f0089e39SRichard Lowe  * are all elided (though the x87 and SSE state are always there).
206f0089e39SRichard Lowe  *
207f0089e39SRichard Lowe  * Unlike fxsave which saves all state, the xsave family does not always save
208f0089e39SRichard Lowe  * and restore all the state that could be covered by the xsave_state. The
209f0089e39SRichard Lowe  * instructions all take an argument which is a mask of what to consider. This
210f0089e39SRichard Lowe  * is the same mask that will be used in the xstate_bv vector and it is also the
211f0089e39SRichard Lowe  * same values that are present in %xcr0 and IA32_XSS. Though IA32_XSS is only
212f0089e39SRichard Lowe  * considered with the xsaves and xrstors instructions.
213f0089e39SRichard Lowe  *
214f0089e39SRichard Lowe  * When a save or restore is requested, a bitwise and is performed between the
215f0089e39SRichard Lowe  * requested bits and those that have been enabled in %xcr0. Only the bits that
216f0089e39SRichard Lowe  * match that are then saved or restored. Others will be silently ignored by
217f0089e39SRichard Lowe  * the processor. This idea is used often in the OS. We will always request that
218f0089e39SRichard Lowe  * we save and restore all of the state, but only those portions that are
219f0089e39SRichard Lowe  * actually enabled in %xcr0 will be touched.
220f0089e39SRichard Lowe  *
221f0089e39SRichard Lowe  * If a feature has been asked to be restored that is not set in the xstate_bv
222f0089e39SRichard Lowe  * feature vector of the save state, then it will be set to its initial state by
223f0089e39SRichard Lowe  * the processor (usually zeros). Also, when asked to save state, the processor
224f0089e39SRichard Lowe  * may not write out data that is in its initial state as an optimization. This
225f0089e39SRichard Lowe  * optimization only applies to saving data and not to restoring data.
226f0089e39SRichard Lowe  *
227f0089e39SRichard Lowe  * There are a few different variants of the xsave and xrstor instruction. They
228f0089e39SRichard Lowe  * are:
229f0089e39SRichard Lowe  *
230f0089e39SRichard Lowe  *   o xsave	This is the original save instruction. It will save all of the
231f0089e39SRichard Lowe  *		requested data in the xsave state structure. It only saves data
232f0089e39SRichard Lowe  *		in the uncompressed (xcomp_bv[63] is zero) format. It may be
233f0089e39SRichard Lowe  *		executed at all privilege levels.
234f0089e39SRichard Lowe  *
235f0089e39SRichard Lowe  *   o xrstor	This is the original restore instruction. It will restore all of
236f0089e39SRichard Lowe  *		the requested data. The xrstor function can handle both the
237f0089e39SRichard Lowe  *		compressed and uncompressed formats. It may be executed at all
238f0089e39SRichard Lowe  *		privilege levels.
239f0089e39SRichard Lowe  *
240f0089e39SRichard Lowe  *   o xsaveopt	This is a variant of the xsave instruction that employs
241f0089e39SRichard Lowe  *		optimizations to try and only write out state that has been
242f0089e39SRichard Lowe  *		modified since the last time an xrstor instruction was called.
243f0089e39SRichard Lowe  *		The processor tracks a tuple of information about the last
244f0089e39SRichard Lowe  *		xrstor and tries to ensure that the same buffer is being used
245f0089e39SRichard Lowe  *		when this optimization is being used. However, because of the
246f0089e39SRichard Lowe  *		way that it tracks the xrstor buffer based on the address of it,
247f0089e39SRichard Lowe  *		it is not suitable for use if that buffer can be easily reused.
248f0089e39SRichard Lowe  *		The most common case is trying to save data to the stack in
249f0089e39SRichard Lowe  *		rtld. It may be executed at all privilege levels.
250f0089e39SRichard Lowe  *
251f0089e39SRichard Lowe  *   o xsavec	This is a variant of the xsave instruction that writes out the
252f0089e39SRichard Lowe  *		compressed form of the xsave_state. Otherwise it behaves as
253f0089e39SRichard Lowe  *		xsave. It may be executed at all privilege levels.
254f0089e39SRichard Lowe  *
255f0089e39SRichard Lowe  *   o xsaves	This is a variant of the xsave instruction. It is similar to
256f0089e39SRichard Lowe  *		xsavec in that it always writes the compressed form of the
257f0089e39SRichard Lowe  *		buffer. Unlike all the other forms, this instruction looks at
258f0089e39SRichard Lowe  *		both the user (%xcr0) and supervisor (IA32_XSS MSR) to determine
259f0089e39SRichard Lowe  *		what to save and restore. xsaves also implements the same
260f0089e39SRichard Lowe  *		optimization that xsaveopt does around modified pieces. User
261f0089e39SRichard Lowe  *		land may not execute the instruction.
262f0089e39SRichard Lowe  *
263f0089e39SRichard Lowe  *   o xrstors	This is a variant of the xrstor instruction. Similar to xsaves
264f0089e39SRichard Lowe  *		it can save and restore both the user and privileged states.
265f0089e39SRichard Lowe  *		Unlike xrstor it can only operate on the compressed form.
266f0089e39SRichard Lowe  *		User land may not execute the instruction.
267f0089e39SRichard Lowe  *
268f0089e39SRichard Lowe  * Based on all of these, the kernel has a precedence for what it will use.
269f0089e39SRichard Lowe  * Basically, xsaves (not supported) is preferred to xsaveopt, which is
270f0089e39SRichard Lowe  * preferred to xsave. A similar scheme is used when informing rtld (more later)
271f0089e39SRichard Lowe  * about what it should use. xsavec is preferred to xsave. xsaveopt is not
272f0089e39SRichard Lowe  * recommended due to the modified optimization not being appropriate for this
273f0089e39SRichard Lowe  * use.
274f0089e39SRichard Lowe  *
275f0089e39SRichard Lowe  * Finally, there is one last gotcha with the xsave state. Importantly some AMD
276f0089e39SRichard Lowe  * processors did not always save and restore some of the FPU exception state in
277f0089e39SRichard Lowe  * some cases like Intel did. In those cases the OS will make up for this fact
278f0089e39SRichard Lowe  * itself.
279f0089e39SRichard Lowe  *
280f0089e39SRichard Lowe  * FPU Initialization
281f0089e39SRichard Lowe  * ------------------
282f0089e39SRichard Lowe  *
283f0089e39SRichard Lowe  * One difference with the FPU registers is that not all threads have FPU state,
284f0089e39SRichard Lowe  * only those that have an lwp. Generally this means kernel threads, which all
285f0089e39SRichard Lowe  * share p0 and its lwp, do not have FPU state. Though there are definitely
286f0089e39SRichard Lowe  * exceptions such as kcfpoold. In the rest of this discussion we'll use thread
287f0089e39SRichard Lowe  * and lwp interchangeably, just think of thread meaning a thread that has a
288f0089e39SRichard Lowe  * lwp.
289f0089e39SRichard Lowe  *
290f0089e39SRichard Lowe  * Each lwp has its FPU state allocated in its pcb (process control block). The
291f0089e39SRichard Lowe  * actual storage comes from the fpsave_cachep kmem cache. This cache is sized
292f0089e39SRichard Lowe  * dynamically at start up based on the save mechanism that we're using and the
293f0089e39SRichard Lowe  * amount of memory required for it. This is dynamic because the xsave_state
294f0089e39SRichard Lowe  * size varies based on the supported feature set.
295f0089e39SRichard Lowe  *
296f0089e39SRichard Lowe  * The hardware side of the FPU is initialized early in boot before we mount the
297f0089e39SRichard Lowe  * root file system. This is effectively done in fpu_probe(). This is where we
298f0089e39SRichard Lowe  * make the final decision about what the save and restore mechanisms we should
299f0089e39SRichard Lowe  * use are, create the fpsave_cachep kmem cache, and initialize a number of
300f0089e39SRichard Lowe  * function pointers that use save and restoring logic.
301f0089e39SRichard Lowe  *
302f0089e39SRichard Lowe  * The thread/lwp side is a a little more involved. There are two different
303f0089e39SRichard Lowe  * things that we need to concern ourselves with. The first is how the FPU
304f0089e39SRichard Lowe  * resources are allocated and the second is how the FPU state is initialized
305f0089e39SRichard Lowe  * for a given lwp.
306f0089e39SRichard Lowe  *
307f0089e39SRichard Lowe  * We allocate the FPU save state from our kmem cache as part of lwp_fp_init().
308f0089e39SRichard Lowe  * This is always called unconditionally by the system as part of creating an
309f0089e39SRichard Lowe  * LWP.
310f0089e39SRichard Lowe  *
311f0089e39SRichard Lowe  * There are three different initialization paths that we deal with. The first
312f0089e39SRichard Lowe  * is when we are executing a new process. As part of exec all of the register
313f0089e39SRichard Lowe  * state is reset. The exec case is particularly important because init is born
314f0089e39SRichard Lowe  * like Athena, sprouting from the head of the kernel, without any true parent
315f0089e39SRichard Lowe  * to fork from. The second is used whenever we fork or create a new lwp.  The
316f0089e39SRichard Lowe  * third is to deal with special lwps like the agent lwp.
317f0089e39SRichard Lowe  *
318f0089e39SRichard Lowe  * During exec, we will call fp_exec() which will initialize and set up the FPU
319f0089e39SRichard Lowe  * state for the process. That will fill in the initial state for the FPU and
320f0089e39SRichard Lowe  * also set that state in the FPU itself. As part of fp_exec() we also install a
321f0089e39SRichard Lowe  * thread context operations vector that takes care of dealing with the saving
322f0089e39SRichard Lowe  * and restoring of the FPU. These context handlers will also be called whenever
323f0089e39SRichard Lowe  * an lwp is created or forked. In those cases, to initialize the FPU we will
324f0089e39SRichard Lowe  * call fp_new_lwp(). Like fp_exec(), fp_new_lwp() will install a context
325f0089e39SRichard Lowe  * operations vector for the new thread.
326f0089e39SRichard Lowe  *
327f0089e39SRichard Lowe  * Next we'll end up in the context operation fp_new_lwp(). This saves the
328f0089e39SRichard Lowe  * current thread's state, initializes the new thread's state, and copies over
329f0089e39SRichard Lowe  * the relevant parts of the originating thread's state. It's as this point that
330f0089e39SRichard Lowe  * we also install the FPU context operations into the new thread, which ensures
331f0089e39SRichard Lowe  * that all future threads that are descendants of the current one get the
332f0089e39SRichard Lowe  * thread context operations (unless they call exec).
333f0089e39SRichard Lowe  *
334f0089e39SRichard Lowe  * To deal with some things like the agent lwp, we double check the state of the
335f0089e39SRichard Lowe  * FPU in sys_rtt_common() to make sure that it has been enabled before
336f0089e39SRichard Lowe  * returning to user land. In general, this path should be rare, but it's useful
337f0089e39SRichard Lowe  * for the odd lwp here and there.
338f0089e39SRichard Lowe  *
339f0089e39SRichard Lowe  * The FPU state will remain valid most of the time. There are times that
340f0089e39SRichard Lowe  * the state will be rewritten. For example in restorecontext, due to /proc, or
341f0089e39SRichard Lowe  * the lwp calls exec(). Whether the context is being freed or we are resetting
342f0089e39SRichard Lowe  * the state, we will call fp_free() to disable the FPU and our context.
343f0089e39SRichard Lowe  *
344f0089e39SRichard Lowe  * Finally, when the lwp is destroyed, it will actually destroy and free the FPU
345f0089e39SRichard Lowe  * state by calling fp_lwp_cleanup().
346f0089e39SRichard Lowe  *
347f0089e39SRichard Lowe  * Kernel FPU Multiplexing
348f0089e39SRichard Lowe  * -----------------------
349f0089e39SRichard Lowe  *
350f0089e39SRichard Lowe  * Just as the kernel has to maintain all of the general purpose registers when
351f0089e39SRichard Lowe  * switching between scheduled threads, the same is true of the FPU registers.
352f0089e39SRichard Lowe  *
353f0089e39SRichard Lowe  * When a thread has FPU state, it also has a set of context operations
354f0089e39SRichard Lowe  * installed. These context operations take care of making sure that the FPU is
355f0089e39SRichard Lowe  * properly saved and restored during a context switch (fpsave_ctxt and
356f0089e39SRichard Lowe  * fprestore_ctxt respectively). This means that the current implementation of
357f0089e39SRichard Lowe  * the FPU is 'eager', when a thread is running the CPU will have its FPU state
358f0089e39SRichard Lowe  * loaded. While this is always true when executing in userland, there are a few
359f0089e39SRichard Lowe  * cases where this is not true in the kernel.
360f0089e39SRichard Lowe  *
361f0089e39SRichard Lowe  * This was not always the case. Traditionally on x86 a 'lazy' FPU restore was
362f0089e39SRichard Lowe  * employed. This meant that the FPU would be saved on a context switch and the
363f0089e39SRichard Lowe  * CR0.TS bit would be set. When a thread next tried to use the FPU, it would
364f0089e39SRichard Lowe  * then take a #NM trap, at which point we would restore the FPU from the save
365f0089e39SRichard Lowe  * area and return to user land. Given the frequency of use of the FPU alone by
366f0089e39SRichard Lowe  * libc, there's no point returning to user land just to trap again.
367f0089e39SRichard Lowe  *
368f0089e39SRichard Lowe  * There are a few cases though where the FPU state may need to be changed for a
369f0089e39SRichard Lowe  * thread on its behalf. The most notable cases are in the case of processes
370f0089e39SRichard Lowe  * using /proc, restorecontext, forking, etc. In all of these cases the kernel
371f0089e39SRichard Lowe  * will force a threads FPU state to be saved into the PCB through the fp_save()
372f0089e39SRichard Lowe  * function. Whenever the FPU is saved, then the FPU_VALID flag is set on the
373f0089e39SRichard Lowe  * pcb. This indicates that the save state holds currently valid data. As a side
374f0089e39SRichard Lowe  * effect of this, CR0.TS will be set. To make sure that all of the state is
375f0089e39SRichard Lowe  * updated before returning to user land, in these cases, we set a flag on the
376f0089e39SRichard Lowe  * PCB that says the FPU needs to be updated. This will make sure that we take
377f0089e39SRichard Lowe  * the slow path out of a system call to fix things up for the thread. Due to
378f0089e39SRichard Lowe  * the fact that this is a rather rare case, effectively setting the equivalent
379f0089e39SRichard Lowe  * of t_postsys is acceptable.
380f0089e39SRichard Lowe  *
381f0089e39SRichard Lowe  * CR0.TS will be set after a save occurs and cleared when a restore occurs.
382f0089e39SRichard Lowe  * Generally this means it will be cleared immediately by the new thread that is
383f0089e39SRichard Lowe  * running in a context switch. However, this isn't the case for kernel threads.
384f0089e39SRichard Lowe  * They currently operate with CR0.TS set as no kernel state is restored for
385f0089e39SRichard Lowe  * them. This means that using the FPU will cause a #NM and panic.
386f0089e39SRichard Lowe  *
387f0089e39SRichard Lowe  * The FPU_VALID flag on the currently executing thread's pcb is meant to track
388f0089e39SRichard Lowe  * what the value of CR0.TS should be. If it is set, then CR0.TS will be set.
389f0089e39SRichard Lowe  * However, because we eagerly restore, the only time that CR0.TS should be set
390f0089e39SRichard Lowe  * for a non-kernel thread is during operations where it will be cleared before
391f0089e39SRichard Lowe  * returning to user land and importantly, the only data that is in it is its
392f0089e39SRichard Lowe  * own.
393f0089e39SRichard Lowe  *
394f0089e39SRichard Lowe  * Kernel FPU Usage
395f0089e39SRichard Lowe  * ----------------
396f0089e39SRichard Lowe  *
397f0089e39SRichard Lowe  * Traditionally the kernel never used the FPU since it had no need for
398f0089e39SRichard Lowe  * floating point operations. However, modern FPU hardware supports a variety
399f0089e39SRichard Lowe  * of SIMD extensions which can speed up code such as parity calculations or
400f0089e39SRichard Lowe  * encryption.
401f0089e39SRichard Lowe  *
402f0089e39SRichard Lowe  * To allow the kernel to take advantage of these features, the
403f0089e39SRichard Lowe  * kernel_fpu_begin() and kernel_fpu_end() functions should be wrapped
404f0089e39SRichard Lowe  * around any usage of the FPU by the kernel to ensure that user-level context
405f0089e39SRichard Lowe  * is properly saved/restored, as well as to properly setup the FPU for use by
406f0089e39SRichard Lowe  * the kernel. There are a variety of ways this wrapping can be used, as
407f0089e39SRichard Lowe  * discussed in this section below.
408f0089e39SRichard Lowe  *
409f0089e39SRichard Lowe  * When kernel_fpu_begin() and kernel_fpu_end() are used for extended
410f0089e39SRichard Lowe  * operations, the kernel_fpu_alloc() function should be used to allocate a
411f0089e39SRichard Lowe  * kfpu_state_t structure that is used to save/restore the thread's kernel FPU
412f0089e39SRichard Lowe  * state. This structure is not tied to any thread. That is, different threads
413f0089e39SRichard Lowe  * can reuse the same kfpu_state_t structure, although not concurrently. A
414f0089e39SRichard Lowe  * kfpu_state_t structure is freed by the kernel_fpu_free() function.
415f0089e39SRichard Lowe  *
416f0089e39SRichard Lowe  * In some cases, the kernel may need to use the FPU for a short operation
417f0089e39SRichard Lowe  * without the overhead to manage a kfpu_state_t structure and without
418f0089e39SRichard Lowe  * allowing for a context switch off the FPU. In this case the KFPU_NO_STATE
419f0089e39SRichard Lowe  * bit can be set in the kernel_fpu_begin() and kernel_fpu_end() flags
420f0089e39SRichard Lowe  * parameter. This indicates that there is no kfpu_state_t. When used this way,
421f0089e39SRichard Lowe  * kernel preemption should be disabled by the caller (kpreempt_disable) before
422f0089e39SRichard Lowe  * calling kernel_fpu_begin(), and re-enabled after calling kernel_fpu_end().
423f0089e39SRichard Lowe  * For this usage, it is important to limit the kernel's FPU use to short
424f0089e39SRichard Lowe  * operations. The tradeoff between using the FPU without a kfpu_state_t
425f0089e39SRichard Lowe  * structure vs. the overhead of allowing a context switch while using the FPU
426f0089e39SRichard Lowe  * should be carefully considered on a case by case basis.
427f0089e39SRichard Lowe  *
428f0089e39SRichard Lowe  * In other cases, kernel threads have an LWP, but never execute in user space.
429f0089e39SRichard Lowe  * In this situation, the LWP's pcb_fpu area can be used to save/restore the
430f0089e39SRichard Lowe  * kernel's FPU state if the thread is context switched, instead of having to
431f0089e39SRichard Lowe  * allocate and manage a kfpu_state_t structure. The KFPU_USE_LWP bit in the
432f0089e39SRichard Lowe  * kernel_fpu_begin() and kernel_fpu_end() flags parameter is used to
433f0089e39SRichard Lowe  * enable this behavior. It is the caller's responsibility to ensure that this
434f0089e39SRichard Lowe  * is only used for a kernel thread which never executes in user space.
435f0089e39SRichard Lowe  *
436f0089e39SRichard Lowe  * FPU Exceptions
437f0089e39SRichard Lowe  * --------------
438f0089e39SRichard Lowe  *
439f0089e39SRichard Lowe  * Certain operations can cause the kernel to take traps due to FPU activity.
440f0089e39SRichard Lowe  * Generally these events will cause a user process to receive a SIGFPU and if
441f0089e39SRichard Lowe  * the kernel receives it in kernel context, we will die. Traditionally the #NM
442f0089e39SRichard Lowe  * (Device Not Available / No Math) exception generated by CR0.TS would have
443f0089e39SRichard Lowe  * caused us to restore the FPU. Now it is a fatal event regardless of whether
444f0089e39SRichard Lowe  * or not user land causes it.
445f0089e39SRichard Lowe  *
446f0089e39SRichard Lowe  * While there are some cases where the kernel uses the FPU, it is up to the
447f0089e39SRichard Lowe  * kernel to use the FPU in a way such that it cannot receive a trap or to use
448f0089e39SRichard Lowe  * the appropriate trap protection mechanisms.
449f0089e39SRichard Lowe  *
450f0089e39SRichard Lowe  * Hypervisors
451f0089e39SRichard Lowe  * -----------
452f0089e39SRichard Lowe  *
453f0089e39SRichard Lowe  * When providing support for hypervisors things are a little bit more
454f0089e39SRichard Lowe  * complicated because the FPU is not virtualized at all. This means that they
455f0089e39SRichard Lowe  * need to save and restore the FPU and %xcr0 across entry and exit to the
456f0089e39SRichard Lowe  * guest. To facilitate this, we provide a series of APIs in <sys/hma.h>. These
457f0089e39SRichard Lowe  * allow us to use the full native state to make sure that we are always saving
458f0089e39SRichard Lowe  * and restoring the full FPU that the host sees, even when the guest is using a
459f0089e39SRichard Lowe  * subset.
460f0089e39SRichard Lowe  *
461f0089e39SRichard Lowe  * One tricky aspect of this is that the guest may be using a subset of %xcr0
462f0089e39SRichard Lowe  * and therefore changing our %xcr0 on the fly. It is vital that when we're
463f0089e39SRichard Lowe  * saving and restoring the FPU that we always use the largest %xcr0 contents
464f0089e39SRichard Lowe  * otherwise we will end up leaving behind data in it.
465f0089e39SRichard Lowe  *
466f0089e39SRichard Lowe  * ELF PLT Support
467f0089e39SRichard Lowe  * ---------------
468f0089e39SRichard Lowe  *
469f0089e39SRichard Lowe  * rtld has to preserve a subset of the FPU when it is saving and restoring
470f0089e39SRichard Lowe  * registers due to the amd64 SYS V ABI. See cmd/sgs/rtld/amd64/boot_elf.s for
471f0089e39SRichard Lowe  * more information. As a result, we set up an aux vector that contains
472f0089e39SRichard Lowe  * information about what save and restore mechanisms it should be using and
473f0089e39SRichard Lowe  * the sizing thereof based on what the kernel supports. This is passed down in
474f0089e39SRichard Lowe  * a series of aux vectors SUN_AT_FPTYPE and SUN_AT_FPSIZE. This information is
475f0089e39SRichard Lowe  * initialized in fpu_subr.c.
476f0089e39SRichard Lowe  */
477f0089e39SRichard Lowe 
478f0089e39SRichard Lowe kmem_cache_t *fpsave_cachep;
479f0089e39SRichard Lowe 
480f0089e39SRichard Lowe /* Legacy fxsave layout + xsave header + ymm */
481f0089e39SRichard Lowe #define	AVX_XSAVE_SIZE		(512 + 64 + 256)
482f0089e39SRichard Lowe 
483f0089e39SRichard Lowe /*
484f0089e39SRichard Lowe  * Various sanity checks.
485f0089e39SRichard Lowe  */
486f0089e39SRichard Lowe CTASSERT(sizeof (struct fxsave_state) == 512);
487f0089e39SRichard Lowe CTASSERT(sizeof (struct fnsave_state) == 108);
488f0089e39SRichard Lowe CTASSERT((offsetof(struct fxsave_state, fx_xmm[0]) & 0xf) == 0);
489f0089e39SRichard Lowe CTASSERT(sizeof (struct xsave_state) >= AVX_XSAVE_SIZE);
490f0089e39SRichard Lowe 
491f0089e39SRichard Lowe /*
492f0089e39SRichard Lowe  * This structure is the x86 implementation of the kernel FPU that is defined in
493f0089e39SRichard Lowe  * uts/common/sys/kfpu.h.
494f0089e39SRichard Lowe  */
495f0089e39SRichard Lowe 
496f0089e39SRichard Lowe typedef enum kfpu_flags {
497f0089e39SRichard Lowe 	/*
498f0089e39SRichard Lowe 	 * This indicates that the save state has initial FPU data.
499f0089e39SRichard Lowe 	 */
500f0089e39SRichard Lowe 	KFPU_F_INITIALIZED = 0x01
501f0089e39SRichard Lowe } kfpu_flags_t;
502f0089e39SRichard Lowe 
503f0089e39SRichard Lowe struct kfpu_state {
504f0089e39SRichard Lowe 	fpu_ctx_t	kfpu_ctx;
505f0089e39SRichard Lowe 	kfpu_flags_t	kfpu_flags;
506f0089e39SRichard Lowe 	kthread_t	*kfpu_curthread;
507f0089e39SRichard Lowe };
508f0089e39SRichard Lowe 
509f0089e39SRichard Lowe /*
510f0089e39SRichard Lowe  * Initial kfpu state for SSE/SSE2 used by fpinit()
511f0089e39SRichard Lowe  */
512f0089e39SRichard Lowe const struct fxsave_state sse_initial = {
513f0089e39SRichard Lowe 	FPU_CW_INIT,	/* fx_fcw */
514f0089e39SRichard Lowe 	0,		/* fx_fsw */
515f0089e39SRichard Lowe 	0,		/* fx_fctw */
516f0089e39SRichard Lowe 	0,		/* fx_fop */
517f0089e39SRichard Lowe 	0,		/* fx_rip */
518f0089e39SRichard Lowe 	0,		/* fx_rdp */
519f0089e39SRichard Lowe 	SSE_MXCSR_INIT	/* fx_mxcsr */
520f0089e39SRichard Lowe 	/* rest of structure is zero */
521f0089e39SRichard Lowe };
522f0089e39SRichard Lowe 
523f0089e39SRichard Lowe /*
524f0089e39SRichard Lowe  * Initial kfpu state for AVX used by fpinit()
525f0089e39SRichard Lowe  */
526f0089e39SRichard Lowe const struct xsave_state avx_initial = {
527f0089e39SRichard Lowe 	/*
528f0089e39SRichard Lowe 	 * The definition below needs to be identical with sse_initial
529f0089e39SRichard Lowe 	 * defined above.
530f0089e39SRichard Lowe 	 */
531*957246c9SPatrick Mooney 	.xs_fxsave = {
532*957246c9SPatrick Mooney 		.fx_fcw = FPU_CW_INIT,
533*957246c9SPatrick Mooney 		.fx_mxcsr = SSE_MXCSR_INIT,
534f0089e39SRichard Lowe 	},
535*957246c9SPatrick Mooney 	.xs_header = {
536f0089e39SRichard Lowe 		/*
537*957246c9SPatrick Mooney 		 * bit0 = 1 for XSTATE_BV to indicate that legacy fields are
538*957246c9SPatrick Mooney 		 * valid, and CPU should initialize XMM/YMM.
539f0089e39SRichard Lowe 		 */
540*957246c9SPatrick Mooney 		.xsh_xstate_bv = 1,
541*957246c9SPatrick Mooney 		.xsh_xcomp_bv = 0,
542*957246c9SPatrick Mooney 	},
543f0089e39SRichard Lowe };
544f0089e39SRichard Lowe 
545f0089e39SRichard Lowe /*
546f0089e39SRichard Lowe  * mxcsr_mask value (possibly reset in fpu_probe); used to avoid
547f0089e39SRichard Lowe  * the #gp exception caused by setting unsupported bits in the
548f0089e39SRichard Lowe  * MXCSR register
549f0089e39SRichard Lowe  */
550f0089e39SRichard Lowe uint32_t sse_mxcsr_mask = SSE_MXCSR_MASK_DEFAULT;
551f0089e39SRichard Lowe 
552f0089e39SRichard Lowe /*
553f0089e39SRichard Lowe  * Initial kfpu state for x87 used by fpinit()
554f0089e39SRichard Lowe  */
555f0089e39SRichard Lowe const struct fnsave_state x87_initial = {
556f0089e39SRichard Lowe 	FPU_CW_INIT,	/* f_fcw */
557f0089e39SRichard Lowe 	0,		/* __f_ign0 */
558f0089e39SRichard Lowe 	0,		/* f_fsw */
559f0089e39SRichard Lowe 	0,		/* __f_ign1 */
560f0089e39SRichard Lowe 	0xffff,		/* f_ftw */
561f0089e39SRichard Lowe 	/* rest of structure is zero */
562f0089e39SRichard Lowe };
563f0089e39SRichard Lowe 
564f0089e39SRichard Lowe /*
565f0089e39SRichard Lowe  * This vector is patched to xsave_ctxt() or xsaveopt_ctxt() if we discover we
566f0089e39SRichard Lowe  * have an XSAVE-capable chip in fpu_probe.
567f0089e39SRichard Lowe  */
568f0089e39SRichard Lowe void (*fpsave_ctxt)(void *) = fpxsave_ctxt;
569f0089e39SRichard Lowe void (*fprestore_ctxt)(void *) = fpxrestore_ctxt;
570f0089e39SRichard Lowe 
571f0089e39SRichard Lowe /*
572f0089e39SRichard Lowe  * This function pointer is changed to xsaveopt if the CPU is xsaveopt capable.
573f0089e39SRichard Lowe  */
574f0089e39SRichard Lowe void (*xsavep)(struct xsave_state *, uint64_t) = xsave;
575f0089e39SRichard Lowe 
576f0089e39SRichard Lowe static int fpe_sicode(uint_t);
577f0089e39SRichard Lowe static int fpe_simd_sicode(uint_t);
5785a469116SPatrick Mooney static void fp_new_lwp(void *, void *);
5795a469116SPatrick Mooney static void fp_free_ctx(void *, int);
5805a469116SPatrick Mooney 
5815a469116SPatrick Mooney static struct ctxop *
5825a469116SPatrick Mooney fp_ctxop_allocate(struct fpu_ctx *fp)
5835a469116SPatrick Mooney {
5845a469116SPatrick Mooney 	const struct ctxop_template tpl = {
5855a469116SPatrick Mooney 		.ct_rev		= CTXOP_TPL_REV,
5865a469116SPatrick Mooney 		.ct_save	= fpsave_ctxt,
5875a469116SPatrick Mooney 		.ct_restore	= fprestore_ctxt,
5885a469116SPatrick Mooney 		.ct_fork	= fp_new_lwp,
5895a469116SPatrick Mooney 		.ct_lwp_create	= fp_new_lwp,
5905a469116SPatrick Mooney 		.ct_free	= fp_free_ctx,
5915a469116SPatrick Mooney 	};
5925a469116SPatrick Mooney 	return (ctxop_allocate(&tpl, fp));
5935a469116SPatrick Mooney }
594f0089e39SRichard Lowe 
595f0089e39SRichard Lowe /*
596f0089e39SRichard Lowe  * Copy the state of parent lwp's floating point context into the new lwp.
597f0089e39SRichard Lowe  * Invoked for both fork() and lwp_create().
598f0089e39SRichard Lowe  *
599f0089e39SRichard Lowe  * Note that we inherit -only- the control state (e.g. exception masks,
600f0089e39SRichard Lowe  * rounding, precision control, etc.); the FPU registers are otherwise
601f0089e39SRichard Lowe  * reset to their initial state.
602f0089e39SRichard Lowe  */
603f0089e39SRichard Lowe static void
6045a469116SPatrick Mooney fp_new_lwp(void *parent, void *child)
605f0089e39SRichard Lowe {
6065a469116SPatrick Mooney 	kthread_id_t t = parent, ct = child;
607f0089e39SRichard Lowe 	struct fpu_ctx *fp;		/* parent fpu context */
608f0089e39SRichard Lowe 	struct fpu_ctx *cfp;		/* new fpu context */
609f0089e39SRichard Lowe 	struct fxsave_state *fx, *cfx;
610f0089e39SRichard Lowe 	struct xsave_state *cxs;
611f0089e39SRichard Lowe 
612f0089e39SRichard Lowe 	ASSERT(fp_kind != FP_NO);
613f0089e39SRichard Lowe 
614f0089e39SRichard Lowe 	fp = &t->t_lwp->lwp_pcb.pcb_fpu;
615f0089e39SRichard Lowe 	cfp = &ct->t_lwp->lwp_pcb.pcb_fpu;
616f0089e39SRichard Lowe 
617f0089e39SRichard Lowe 	/*
618f0089e39SRichard Lowe 	 * If the parent FPU state is still in the FPU hw then save it;
619f0089e39SRichard Lowe 	 * conveniently, fp_save() already does this for us nicely.
620f0089e39SRichard Lowe 	 */
621f0089e39SRichard Lowe 	fp_save(fp);
622f0089e39SRichard Lowe 
623f0089e39SRichard Lowe 	cfp->fpu_flags = FPU_EN | FPU_VALID;
624f0089e39SRichard Lowe 	cfp->fpu_regs.kfpu_status = 0;
625f0089e39SRichard Lowe 	cfp->fpu_regs.kfpu_xstatus = 0;
626f0089e39SRichard Lowe 
627f0089e39SRichard Lowe 	/*
628f0089e39SRichard Lowe 	 * Make sure that the child's FPU is cleaned up and made ready for user
629f0089e39SRichard Lowe 	 * land.
630f0089e39SRichard Lowe 	 */
631f0089e39SRichard Lowe 	PCB_SET_UPDATE_FPU(&ct->t_lwp->lwp_pcb);
632f0089e39SRichard Lowe 
633f0089e39SRichard Lowe 	switch (fp_save_mech) {
634f0089e39SRichard Lowe 	case FP_FXSAVE:
635f0089e39SRichard Lowe 		fx = fp->fpu_regs.kfpu_u.kfpu_fx;
636f0089e39SRichard Lowe 		cfx = cfp->fpu_regs.kfpu_u.kfpu_fx;
637f0089e39SRichard Lowe 		bcopy(&sse_initial, cfx, sizeof (*cfx));
638f0089e39SRichard Lowe 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
639f0089e39SRichard Lowe 		cfx->fx_fcw = fx->fx_fcw;
640f0089e39SRichard Lowe 		break;
641f0089e39SRichard Lowe 
642f0089e39SRichard Lowe 	case FP_XSAVE:
643f0089e39SRichard Lowe 		cfp->fpu_xsave_mask = fp->fpu_xsave_mask;
644f0089e39SRichard Lowe 
645f0089e39SRichard Lowe 		VERIFY(fp->fpu_regs.kfpu_u.kfpu_xs != NULL);
646f0089e39SRichard Lowe 
647f0089e39SRichard Lowe 		fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave;
648f0089e39SRichard Lowe 		cxs = cfp->fpu_regs.kfpu_u.kfpu_xs;
649f0089e39SRichard Lowe 		cfx = &cxs->xs_fxsave;
650f0089e39SRichard Lowe 
651f0089e39SRichard Lowe 		bcopy(&avx_initial, cxs, sizeof (*cxs));
652f0089e39SRichard Lowe 		cfx->fx_mxcsr = fx->fx_mxcsr & ~SSE_MXCSR_EFLAGS;
653f0089e39SRichard Lowe 		cfx->fx_fcw = fx->fx_fcw;
654*957246c9SPatrick Mooney 		cxs->xs_header.xsh_xstate_bv |=
655*957246c9SPatrick Mooney 		    (get_xcr(XFEATURE_ENABLED_MASK) & XFEATURE_FP_INITIAL);
656f0089e39SRichard Lowe 		break;
657f0089e39SRichard Lowe 	default:
658f0089e39SRichard Lowe 		panic("Invalid fp_save_mech");
659f0089e39SRichard Lowe 		/*NOTREACHED*/
660f0089e39SRichard Lowe 	}
661f0089e39SRichard Lowe 
662f0089e39SRichard Lowe 	/*
663f0089e39SRichard Lowe 	 * Mark that both the parent and child need to have the FPU cleaned up
664f0089e39SRichard Lowe 	 * before returning to user land.
665f0089e39SRichard Lowe 	 */
666f0089e39SRichard Lowe 
6675a469116SPatrick Mooney 	ctxop_attach(ct, fp_ctxop_allocate(cfp));
668f0089e39SRichard Lowe }
669f0089e39SRichard Lowe 
670f0089e39SRichard Lowe /*
671f0089e39SRichard Lowe  * Free any state associated with floating point context.
672f0089e39SRichard Lowe  * Fp_free can be called in three cases:
673f0089e39SRichard Lowe  * 1) from reaper -> thread_free -> freectx-> fp_free
674f0089e39SRichard Lowe  *	fp context belongs to a thread on deathrow
675f0089e39SRichard Lowe  *	nothing to do,  thread will never be resumed
676f0089e39SRichard Lowe  *	thread calling ctxfree is reaper
677f0089e39SRichard Lowe  *
678f0089e39SRichard Lowe  * 2) from exec -> freectx -> fp_free
679f0089e39SRichard Lowe  *	fp context belongs to the current thread
680f0089e39SRichard Lowe  *	must disable fpu, thread calling ctxfree is curthread
681f0089e39SRichard Lowe  *
682f0089e39SRichard Lowe  * 3) from restorecontext -> setfpregs -> fp_free
683f0089e39SRichard Lowe  *	we have a modified context in the memory (lwp->pcb_fpu)
684f0089e39SRichard Lowe  *	disable fpu and release the fp context for the CPU
685f0089e39SRichard Lowe  *
686f0089e39SRichard Lowe  */
687f0089e39SRichard Lowe void
6885a469116SPatrick Mooney fp_free(struct fpu_ctx *fp)
689f0089e39SRichard Lowe {
690f0089e39SRichard Lowe 	ASSERT(fp_kind != FP_NO);
691f0089e39SRichard Lowe 
692f0089e39SRichard Lowe 	if (fp->fpu_flags & FPU_VALID)
693f0089e39SRichard Lowe 		return;
694f0089e39SRichard Lowe 
695f0089e39SRichard Lowe 	kpreempt_disable();
696f0089e39SRichard Lowe 	/*
697f0089e39SRichard Lowe 	 * We want to do fpsave rather than fpdisable so that we can
698f0089e39SRichard Lowe 	 * keep the fpu_flags as FPU_VALID tracking the CR0_TS bit
699f0089e39SRichard Lowe 	 */
700f0089e39SRichard Lowe 	fp->fpu_flags |= FPU_VALID;
701f0089e39SRichard Lowe 	/* If for current thread disable FP to track FPU_VALID */
702f0089e39SRichard Lowe 	if (curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu) {
703f0089e39SRichard Lowe 		/* Clear errors if any to prevent frstor from complaining */
704f0089e39SRichard Lowe 		(void) fperr_reset();
705f0089e39SRichard Lowe 		if (fp_kind & __FP_SSE)
706f0089e39SRichard Lowe 			(void) fpxerr_reset();
707f0089e39SRichard Lowe 		fpdisable();
708f0089e39SRichard Lowe 	}
709f0089e39SRichard Lowe 	kpreempt_enable();
710f0089e39SRichard Lowe }
711f0089e39SRichard Lowe 
712f0089e39SRichard Lowe /*
7135a469116SPatrick Mooney  * Wrapper for freectx to make the types line up for fp_free()
7145a469116SPatrick Mooney  */
7155a469116SPatrick Mooney static void
7165a469116SPatrick Mooney fp_free_ctx(void *arg, int isexec __unused)
7175a469116SPatrick Mooney {
7185a469116SPatrick Mooney 	fp_free((struct fpu_ctx *)arg);
7195a469116SPatrick Mooney }
7205a469116SPatrick Mooney 
7215a469116SPatrick Mooney /*
722f0089e39SRichard Lowe  * Store the floating point state and disable the floating point unit.
723f0089e39SRichard Lowe  */
724f0089e39SRichard Lowe void
725f0089e39SRichard Lowe fp_save(struct fpu_ctx *fp)
726f0089e39SRichard Lowe {
727f0089e39SRichard Lowe 	ASSERT(fp_kind != FP_NO);
728f0089e39SRichard Lowe 
729f0089e39SRichard Lowe 	kpreempt_disable();
730f0089e39SRichard Lowe 	if (!fp || fp->fpu_flags & FPU_VALID ||
731f0089e39SRichard Lowe 	    (fp->fpu_flags & FPU_EN) == 0) {
732f0089e39SRichard Lowe 		kpreempt_enable();
733f0089e39SRichard Lowe 		return;
734f0089e39SRichard Lowe 	}
735f0089e39SRichard Lowe 	ASSERT(curthread->t_lwp && fp == &curthread->t_lwp->lwp_pcb.pcb_fpu);
736f0089e39SRichard Lowe 
737f0089e39SRichard Lowe 	switch (fp_save_mech) {
738f0089e39SRichard Lowe 	case FP_FXSAVE:
739f0089e39SRichard Lowe 		fpxsave(fp->fpu_regs.kfpu_u.kfpu_fx);
740f0089e39SRichard Lowe 		break;
741f0089e39SRichard Lowe 
742f0089e39SRichard Lowe 	case FP_XSAVE:
743f0089e39SRichard Lowe 		xsavep(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
744f0089e39SRichard Lowe 		break;
745f0089e39SRichard Lowe 	default:
746f0089e39SRichard Lowe 		panic("Invalid fp_save_mech");
747f0089e39SRichard Lowe 		/*NOTREACHED*/
748f0089e39SRichard Lowe 	}
749f0089e39SRichard Lowe 
750f0089e39SRichard Lowe 	fp->fpu_flags |= FPU_VALID;
751f0089e39SRichard Lowe 
752f0089e39SRichard Lowe 	/*
753f0089e39SRichard Lowe 	 * We save the FPU as part of forking, execing, modifications via /proc,
754f0089e39SRichard Lowe 	 * restorecontext, etc. As such, we need to make sure that we return to
755f0089e39SRichard Lowe 	 * userland with valid state in the FPU. If we're context switched out
756f0089e39SRichard Lowe 	 * before we hit sys_rtt_common() we'll end up having restored the FPU
757f0089e39SRichard Lowe 	 * as part of the context ops operations. The restore logic always makes
758f0089e39SRichard Lowe 	 * sure that FPU_VALID is set before doing a restore so we don't restore
759f0089e39SRichard Lowe 	 * it a second time.
760f0089e39SRichard Lowe 	 */
761f0089e39SRichard Lowe 	PCB_SET_UPDATE_FPU(&curthread->t_lwp->lwp_pcb);
762f0089e39SRichard Lowe 
763f0089e39SRichard Lowe 	kpreempt_enable();
764f0089e39SRichard Lowe }
765f0089e39SRichard Lowe 
766f0089e39SRichard Lowe /*
767f0089e39SRichard Lowe  * Restore the FPU context for the thread:
768f0089e39SRichard Lowe  * The possibilities are:
769f0089e39SRichard Lowe  *	1. No active FPU context: Load the new context into the FPU hw
770f0089e39SRichard Lowe  *	   and enable the FPU.
771f0089e39SRichard Lowe  */
772f0089e39SRichard Lowe void
773f0089e39SRichard Lowe fp_restore(struct fpu_ctx *fp)
774f0089e39SRichard Lowe {
775f0089e39SRichard Lowe 	switch (fp_save_mech) {
776f0089e39SRichard Lowe 	case FP_FXSAVE:
777f0089e39SRichard Lowe 		fpxrestore(fp->fpu_regs.kfpu_u.kfpu_fx);
778f0089e39SRichard Lowe 		break;
779f0089e39SRichard Lowe 
780f0089e39SRichard Lowe 	case FP_XSAVE:
781f0089e39SRichard Lowe 		xrestore(fp->fpu_regs.kfpu_u.kfpu_xs, fp->fpu_xsave_mask);
782f0089e39SRichard Lowe 		break;
783f0089e39SRichard Lowe 	default:
784f0089e39SRichard Lowe 		panic("Invalid fp_save_mech");
785f0089e39SRichard Lowe 		/*NOTREACHED*/
786f0089e39SRichard Lowe 	}
787f0089e39SRichard Lowe 
788f0089e39SRichard Lowe 	fp->fpu_flags &= ~FPU_VALID;
789f0089e39SRichard Lowe }
790f0089e39SRichard Lowe 
791f0089e39SRichard Lowe /*
792f0089e39SRichard Lowe  * Reset the FPU such that it is in a valid state for a new thread that is
793f0089e39SRichard Lowe  * coming out of exec. The FPU will be in a usable state at this point. At this
794f0089e39SRichard Lowe  * point we know that the FPU state has already been allocated and if this
795f0089e39SRichard Lowe  * wasn't an init process, then it will have had fp_free() previously called.
796f0089e39SRichard Lowe  */
797f0089e39SRichard Lowe void
798f0089e39SRichard Lowe fp_exec(void)
799f0089e39SRichard Lowe {
800f0089e39SRichard Lowe 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
801f0089e39SRichard Lowe 
802f0089e39SRichard Lowe 	if (fp_save_mech == FP_XSAVE) {
803f0089e39SRichard Lowe 		fp->fpu_xsave_mask = XFEATURE_FP_ALL;
804f0089e39SRichard Lowe 	}
805f0089e39SRichard Lowe 
8065a469116SPatrick Mooney 	struct ctxop *ctx = fp_ctxop_allocate(fp);
807f0089e39SRichard Lowe 	/*
808f0089e39SRichard Lowe 	 * Make sure that we're not preempted in the middle of initializing the
809f0089e39SRichard Lowe 	 * FPU on CPU.
810f0089e39SRichard Lowe 	 */
811f0089e39SRichard Lowe 	kpreempt_disable();
8125a469116SPatrick Mooney 	ctxop_attach(curthread, ctx);
813f0089e39SRichard Lowe 	fpinit();
814f0089e39SRichard Lowe 	fp->fpu_flags = FPU_EN;
815f0089e39SRichard Lowe 	kpreempt_enable();
816f0089e39SRichard Lowe }
817f0089e39SRichard Lowe 
818f0089e39SRichard Lowe 
819f0089e39SRichard Lowe /*
820f0089e39SRichard Lowe  * Seeds the initial state for the current thread.  The possibilities are:
821f0089e39SRichard Lowe  *      1. Another process has modified the FPU state before we have done any
822f0089e39SRichard Lowe  *         initialization: Load the FPU state from the LWP state.
823f0089e39SRichard Lowe  *      2. The FPU state has not been externally modified:  Load a clean state.
824f0089e39SRichard Lowe  */
825f0089e39SRichard Lowe void
826f0089e39SRichard Lowe fp_seed(void)
827f0089e39SRichard Lowe {
828f0089e39SRichard Lowe 	struct fpu_ctx *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
829f0089e39SRichard Lowe 
830f0089e39SRichard Lowe 	ASSERT(curthread->t_preempt >= 1);
831f0089e39SRichard Lowe 	ASSERT((fp->fpu_flags & FPU_EN) == 0);
832f0089e39SRichard Lowe 
833f0089e39SRichard Lowe 	/*
834f0089e39SRichard Lowe 	 * Always initialize a new context and initialize the hardware.
835f0089e39SRichard Lowe 	 */
836f0089e39SRichard Lowe 	if (fp_save_mech == FP_XSAVE) {
837f0089e39SRichard Lowe 		fp->fpu_xsave_mask = XFEATURE_FP_ALL;
838f0089e39SRichard Lowe 	}
839f0089e39SRichard Lowe 
8405a469116SPatrick Mooney 	ctxop_attach(curthread, fp_ctxop_allocate(fp));
841f0089e39SRichard Lowe 	fpinit();
842f0089e39SRichard Lowe 
843f0089e39SRichard Lowe 	/*
844f0089e39SRichard Lowe 	 * If FPU_VALID is set, it means someone has modified registers via
845f0089e39SRichard Lowe 	 * /proc.  In this case, restore the current lwp's state.
846f0089e39SRichard Lowe 	 */
847f0089e39SRichard Lowe 	if (fp->fpu_flags & FPU_VALID)
848f0089e39SRichard Lowe 		fp_restore(fp);
849f0089e39SRichard Lowe 
850f0089e39SRichard Lowe 	ASSERT((fp->fpu_flags & FPU_VALID) == 0);
851f0089e39SRichard Lowe 	fp->fpu_flags = FPU_EN;
852f0089e39SRichard Lowe }
853f0089e39SRichard Lowe 
854f0089e39SRichard Lowe /*
855f0089e39SRichard Lowe  * When using xsave/xrstor, these three functions are used by the lwp code to
856f0089e39SRichard Lowe  * manage the memory for the xsave area.
857f0089e39SRichard Lowe  */
858f0089e39SRichard Lowe void
859f0089e39SRichard Lowe fp_lwp_init(struct _klwp *lwp)
860f0089e39SRichard Lowe {
861f0089e39SRichard Lowe 	struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu;
862f0089e39SRichard Lowe 
863f0089e39SRichard Lowe 	/*
864f0089e39SRichard Lowe 	 * We keep a copy of the pointer in lwp_fpu so that we can restore the
865f0089e39SRichard Lowe 	 * value in forklwp() after we duplicate the parent's LWP state.
866f0089e39SRichard Lowe 	 */
867f0089e39SRichard Lowe 	lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic =
868f0089e39SRichard Lowe 	    kmem_cache_alloc(fpsave_cachep, KM_SLEEP);
869f0089e39SRichard Lowe 
870f0089e39SRichard Lowe 	if (fp_save_mech == FP_XSAVE) {
871f0089e39SRichard Lowe 		/*
872f0089e39SRichard Lowe 		 *
873f0089e39SRichard Lowe 		 * We bzero since the fpinit() code path will only
874f0089e39SRichard Lowe 		 * partially initialize the xsave area using avx_inital.
875f0089e39SRichard Lowe 		 */
876f0089e39SRichard Lowe 		ASSERT(cpuid_get_xsave_size() >= sizeof (struct xsave_state));
877f0089e39SRichard Lowe 		bzero(fp->fpu_regs.kfpu_u.kfpu_xs, cpuid_get_xsave_size());
878f0089e39SRichard Lowe 	}
879f0089e39SRichard Lowe }
880f0089e39SRichard Lowe 
881f0089e39SRichard Lowe void
882f0089e39SRichard Lowe fp_lwp_cleanup(struct _klwp *lwp)
883f0089e39SRichard Lowe {
884f0089e39SRichard Lowe 	struct fpu_ctx *fp = &lwp->lwp_pcb.pcb_fpu;
885f0089e39SRichard Lowe 
886f0089e39SRichard Lowe 	if (fp->fpu_regs.kfpu_u.kfpu_generic != NULL) {
887f0089e39SRichard Lowe 		kmem_cache_free(fpsave_cachep,
888f0089e39SRichard Lowe 		    fp->fpu_regs.kfpu_u.kfpu_generic);
889f0089e39SRichard Lowe 		lwp->lwp_fpu = fp->fpu_regs.kfpu_u.kfpu_generic = NULL;
890f0089e39SRichard Lowe 	}
891f0089e39SRichard Lowe }
892f0089e39SRichard Lowe 
893f0089e39SRichard Lowe /*
894f0089e39SRichard Lowe  * Called during the process of forklwp(). The kfpu_u pointer will have been
895f0089e39SRichard Lowe  * overwritten while copying the parent's LWP structure. We have a valid copy
896f0089e39SRichard Lowe  * stashed in the child's lwp_fpu which we use to restore the correct value.
897f0089e39SRichard Lowe  */
898f0089e39SRichard Lowe void
899f0089e39SRichard Lowe fp_lwp_dup(struct _klwp *lwp)
900f0089e39SRichard Lowe {
901f0089e39SRichard Lowe 	void *xp = lwp->lwp_fpu;
902f0089e39SRichard Lowe 	size_t sz;
903f0089e39SRichard Lowe 
904f0089e39SRichard Lowe 	switch (fp_save_mech) {
905f0089e39SRichard Lowe 	case FP_FXSAVE:
906f0089e39SRichard Lowe 		sz = sizeof (struct fxsave_state);
907f0089e39SRichard Lowe 		break;
908f0089e39SRichard Lowe 	case FP_XSAVE:
909f0089e39SRichard Lowe 		sz = cpuid_get_xsave_size();
910f0089e39SRichard Lowe 		break;
911f0089e39SRichard Lowe 	default:
912f0089e39SRichard Lowe 		panic("Invalid fp_save_mech");
913f0089e39SRichard Lowe 		/*NOTREACHED*/
914f0089e39SRichard Lowe 	}
915f0089e39SRichard Lowe 
916f0089e39SRichard Lowe 	/* copy the parent's values into the new lwp's struct */
917f0089e39SRichard Lowe 	bcopy(lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic, xp, sz);
918f0089e39SRichard Lowe 	/* now restore the pointer */
919f0089e39SRichard Lowe 	lwp->lwp_pcb.pcb_fpu.fpu_regs.kfpu_u.kfpu_generic = xp;
920f0089e39SRichard Lowe }
921f0089e39SRichard Lowe 
922f0089e39SRichard Lowe /*
923f0089e39SRichard Lowe  * Handle a processor extension error fault
924f0089e39SRichard Lowe  * Returns non zero for error.
925f0089e39SRichard Lowe  */
926f0089e39SRichard Lowe 
927f0089e39SRichard Lowe /*ARGSUSED*/
928f0089e39SRichard Lowe int
929f0089e39SRichard Lowe fpexterrflt(struct regs *rp)
930f0089e39SRichard Lowe {
931f0089e39SRichard Lowe 	uint32_t fpcw, fpsw;
932f0089e39SRichard Lowe 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
933f0089e39SRichard Lowe 
934f0089e39SRichard Lowe 	ASSERT(fp_kind != FP_NO);
935f0089e39SRichard Lowe 
936f0089e39SRichard Lowe 	/*
937f0089e39SRichard Lowe 	 * Now we can enable the interrupts.
938f0089e39SRichard Lowe 	 * (NOTE: x87 fp exceptions come thru interrupt gate)
939f0089e39SRichard Lowe 	 */
940f0089e39SRichard Lowe 	sti();
941f0089e39SRichard Lowe 
942f0089e39SRichard Lowe 	if (!fpu_exists)
943f0089e39SRichard Lowe 		return (FPE_FLTINV);
944f0089e39SRichard Lowe 
945f0089e39SRichard Lowe 	/*
946f0089e39SRichard Lowe 	 * Do an unconditional save of the FP state.  If it's dirty (TS=0),
947f0089e39SRichard Lowe 	 * it'll be saved into the fpu context area passed in (that of the
948f0089e39SRichard Lowe 	 * current thread).  If it's not dirty (it may not be, due to
949f0089e39SRichard Lowe 	 * an intervening save due to a context switch between the sti(),
950f0089e39SRichard Lowe 	 * above and here, then it's safe to just use the stored values in
951f0089e39SRichard Lowe 	 * the context save area to determine the cause of the fault.
952f0089e39SRichard Lowe 	 */
953f0089e39SRichard Lowe 	fp_save(fp);
954f0089e39SRichard Lowe 
955f0089e39SRichard Lowe 	/* clear exception flags in saved state, as if by fnclex */
956f0089e39SRichard Lowe 	switch (fp_save_mech) {
957f0089e39SRichard Lowe 	case FP_FXSAVE:
958f0089e39SRichard Lowe 		fpsw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw;
959f0089e39SRichard Lowe 		fpcw = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fcw;
960f0089e39SRichard Lowe 		fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw &= ~FPS_SW_EFLAGS;
961f0089e39SRichard Lowe 		break;
962f0089e39SRichard Lowe 
963f0089e39SRichard Lowe 	case FP_XSAVE:
964f0089e39SRichard Lowe 		fpsw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw;
965f0089e39SRichard Lowe 		fpcw = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fcw;
966f0089e39SRichard Lowe 		fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw &= ~FPS_SW_EFLAGS;
967f0089e39SRichard Lowe 		/*
968f0089e39SRichard Lowe 		 * Always set LEGACY_FP as it may have been cleared by XSAVE
969f0089e39SRichard Lowe 		 * instruction
970f0089e39SRichard Lowe 		 */
971*957246c9SPatrick Mooney 		fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
972*957246c9SPatrick Mooney 		    XFEATURE_LEGACY_FP;
973f0089e39SRichard Lowe 		break;
974f0089e39SRichard Lowe 	default:
975f0089e39SRichard Lowe 		panic("Invalid fp_save_mech");
976f0089e39SRichard Lowe 		/*NOTREACHED*/
977f0089e39SRichard Lowe 	}
978f0089e39SRichard Lowe 
979f0089e39SRichard Lowe 	fp->fpu_regs.kfpu_status = fpsw;
980f0089e39SRichard Lowe 
981f0089e39SRichard Lowe 	if ((fpsw & FPS_ES) == 0)
982f0089e39SRichard Lowe 		return (0);		/* No exception */
983f0089e39SRichard Lowe 
984f0089e39SRichard Lowe 	/*
985f0089e39SRichard Lowe 	 * "and" the exception flags with the complement of the mask
986f0089e39SRichard Lowe 	 * bits to determine which exception occurred
987f0089e39SRichard Lowe 	 */
988f0089e39SRichard Lowe 	return (fpe_sicode(fpsw & ~fpcw & 0x3f));
989f0089e39SRichard Lowe }
990f0089e39SRichard Lowe 
991f0089e39SRichard Lowe /*
992f0089e39SRichard Lowe  * Handle an SSE/SSE2 precise exception.
993f0089e39SRichard Lowe  * Returns a non-zero sicode for error.
994f0089e39SRichard Lowe  */
995f0089e39SRichard Lowe /*ARGSUSED*/
996f0089e39SRichard Lowe int
997f0089e39SRichard Lowe fpsimderrflt(struct regs *rp)
998f0089e39SRichard Lowe {
999f0089e39SRichard Lowe 	uint32_t mxcsr, xmask;
1000f0089e39SRichard Lowe 	fpu_ctx_t *fp = &ttolwp(curthread)->lwp_pcb.pcb_fpu;
1001f0089e39SRichard Lowe 
1002f0089e39SRichard Lowe 	ASSERT(fp_kind & __FP_SSE);
1003f0089e39SRichard Lowe 
1004f0089e39SRichard Lowe 	/*
1005f0089e39SRichard Lowe 	 * NOTE: Interrupts are disabled during execution of this
1006f0089e39SRichard Lowe 	 * function.  They are enabled by the caller in trap.c.
1007f0089e39SRichard Lowe 	 */
1008f0089e39SRichard Lowe 
1009f0089e39SRichard Lowe 	/*
1010f0089e39SRichard Lowe 	 * The only way we could have gotten here if there is no FP unit
1011f0089e39SRichard Lowe 	 * is via a user executing an INT $19 instruction, so there is
1012f0089e39SRichard Lowe 	 * no fault in that case.
1013f0089e39SRichard Lowe 	 */
1014f0089e39SRichard Lowe 	if (!fpu_exists)
1015f0089e39SRichard Lowe 		return (0);
1016f0089e39SRichard Lowe 
1017f0089e39SRichard Lowe 	/*
1018f0089e39SRichard Lowe 	 * Do an unconditional save of the FP state.  If it's dirty (TS=0),
1019f0089e39SRichard Lowe 	 * it'll be saved into the fpu context area passed in (that of the
1020f0089e39SRichard Lowe 	 * current thread).  If it's not dirty, then it's safe to just use
1021f0089e39SRichard Lowe 	 * the stored values in the context save area to determine the
1022f0089e39SRichard Lowe 	 * cause of the fault.
1023f0089e39SRichard Lowe 	 */
1024f0089e39SRichard Lowe 	fp_save(fp);		/* save the FPU state */
1025f0089e39SRichard Lowe 
1026f0089e39SRichard Lowe 	if (fp_save_mech == FP_XSAVE) {
1027f0089e39SRichard Lowe 		mxcsr = fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_mxcsr;
1028f0089e39SRichard Lowe 		fp->fpu_regs.kfpu_status =
1029f0089e39SRichard Lowe 		    fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave.fx_fsw;
1030f0089e39SRichard Lowe 	} else {
1031f0089e39SRichard Lowe 		mxcsr = fp->fpu_regs.kfpu_u.kfpu_fx->fx_mxcsr;
1032f0089e39SRichard Lowe 		fp->fpu_regs.kfpu_status = fp->fpu_regs.kfpu_u.kfpu_fx->fx_fsw;
1033f0089e39SRichard Lowe 	}
1034f0089e39SRichard Lowe 	fp->fpu_regs.kfpu_xstatus = mxcsr;
1035f0089e39SRichard Lowe 
1036f0089e39SRichard Lowe 	/*
1037f0089e39SRichard Lowe 	 * compute the mask that determines which conditions can cause
1038f0089e39SRichard Lowe 	 * a #xm exception, and use this to clean the status bits so that
1039f0089e39SRichard Lowe 	 * we can identify the true cause of this one.
1040f0089e39SRichard Lowe 	 */
1041f0089e39SRichard Lowe 	xmask = (mxcsr >> 7) & SSE_MXCSR_EFLAGS;
1042f0089e39SRichard Lowe 	return (fpe_simd_sicode((mxcsr & SSE_MXCSR_EFLAGS) & ~xmask));
1043f0089e39SRichard Lowe }
1044f0089e39SRichard Lowe 
1045f0089e39SRichard Lowe /*
1046f0089e39SRichard Lowe  * In the unlikely event that someone is relying on this subcode being
1047f0089e39SRichard Lowe  * FPE_FLTILL for denormalize exceptions, it can always be patched back
1048f0089e39SRichard Lowe  * again to restore old behaviour.
1049f0089e39SRichard Lowe  */
1050f0089e39SRichard Lowe int fpe_fltden = FPE_FLTDEN;
1051f0089e39SRichard Lowe 
1052f0089e39SRichard Lowe /*
1053f0089e39SRichard Lowe  * Map from the FPU status word to the FP exception si_code.
1054f0089e39SRichard Lowe  */
1055f0089e39SRichard Lowe static int
1056f0089e39SRichard Lowe fpe_sicode(uint_t sw)
1057f0089e39SRichard Lowe {
1058f0089e39SRichard Lowe 	if (sw & FPS_IE)
1059f0089e39SRichard Lowe 		return (FPE_FLTINV);
1060f0089e39SRichard Lowe 	if (sw & FPS_ZE)
1061f0089e39SRichard Lowe 		return (FPE_FLTDIV);
1062f0089e39SRichard Lowe 	if (sw & FPS_DE)
1063f0089e39SRichard Lowe 		return (fpe_fltden);
1064f0089e39SRichard Lowe 	if (sw & FPS_OE)
1065f0089e39SRichard Lowe 		return (FPE_FLTOVF);
1066f0089e39SRichard Lowe 	if (sw & FPS_UE)
1067f0089e39SRichard Lowe 		return (FPE_FLTUND);
1068f0089e39SRichard Lowe 	if (sw & FPS_PE)
1069f0089e39SRichard Lowe 		return (FPE_FLTRES);
1070f0089e39SRichard Lowe 	return (FPE_FLTINV);	/* default si_code for other exceptions */
1071f0089e39SRichard Lowe }
1072f0089e39SRichard Lowe 
1073f0089e39SRichard Lowe /*
1074f0089e39SRichard Lowe  * Map from the SSE status word to the FP exception si_code.
1075f0089e39SRichard Lowe  */
1076f0089e39SRichard Lowe static int
1077f0089e39SRichard Lowe fpe_simd_sicode(uint_t sw)
1078f0089e39SRichard Lowe {
1079f0089e39SRichard Lowe 	if (sw & SSE_IE)
1080f0089e39SRichard Lowe 		return (FPE_FLTINV);
1081f0089e39SRichard Lowe 	if (sw & SSE_ZE)
1082f0089e39SRichard Lowe 		return (FPE_FLTDIV);
1083f0089e39SRichard Lowe 	if (sw & SSE_DE)
1084f0089e39SRichard Lowe 		return (FPE_FLTDEN);
1085f0089e39SRichard Lowe 	if (sw & SSE_OE)
1086f0089e39SRichard Lowe 		return (FPE_FLTOVF);
1087f0089e39SRichard Lowe 	if (sw & SSE_UE)
1088f0089e39SRichard Lowe 		return (FPE_FLTUND);
1089f0089e39SRichard Lowe 	if (sw & SSE_PE)
1090f0089e39SRichard Lowe 		return (FPE_FLTRES);
1091f0089e39SRichard Lowe 	return (FPE_FLTINV);	/* default si_code for other exceptions */
1092f0089e39SRichard Lowe }
1093f0089e39SRichard Lowe 
1094f0089e39SRichard Lowe /*
1095f0089e39SRichard Lowe  * This routine is invoked as part of libc's __fpstart implementation
1096f0089e39SRichard Lowe  * via sysi86(2).
1097f0089e39SRichard Lowe  *
1098f0089e39SRichard Lowe  * It may be called -before- any context has been assigned in which case
1099f0089e39SRichard Lowe  * we try and avoid touching the hardware.  Or it may be invoked well
1100f0089e39SRichard Lowe  * after the context has been assigned and fiddled with, in which case
1101f0089e39SRichard Lowe  * just tweak it directly.
1102f0089e39SRichard Lowe  */
1103f0089e39SRichard Lowe void
1104f0089e39SRichard Lowe fpsetcw(uint16_t fcw, uint32_t mxcsr)
1105f0089e39SRichard Lowe {
1106f0089e39SRichard Lowe 	struct fpu_ctx *fp = &curthread->t_lwp->lwp_pcb.pcb_fpu;
1107f0089e39SRichard Lowe 	struct fxsave_state *fx;
1108f0089e39SRichard Lowe 
1109f0089e39SRichard Lowe 	if (!fpu_exists || fp_kind == FP_NO)
1110f0089e39SRichard Lowe 		return;
1111f0089e39SRichard Lowe 
1112f0089e39SRichard Lowe 	if ((fp->fpu_flags & FPU_EN) == 0) {
1113f0089e39SRichard Lowe 		if (fcw == FPU_CW_INIT && mxcsr == SSE_MXCSR_INIT) {
1114f0089e39SRichard Lowe 			/*
1115f0089e39SRichard Lowe 			 * Common case.  Floating point unit not yet
1116f0089e39SRichard Lowe 			 * enabled, and kernel already intends to initialize
1117f0089e39SRichard Lowe 			 * the hardware the way the caller wants.
1118f0089e39SRichard Lowe 			 */
1119f0089e39SRichard Lowe 			return;
1120f0089e39SRichard Lowe 		}
1121f0089e39SRichard Lowe 		/*
1122f0089e39SRichard Lowe 		 * Hmm.  Userland wants a different default.
1123f0089e39SRichard Lowe 		 * Do a fake "first trap" to establish the context, then
1124f0089e39SRichard Lowe 		 * handle as if we already had a context before we came in.
1125f0089e39SRichard Lowe 		 */
1126f0089e39SRichard Lowe 		kpreempt_disable();
1127f0089e39SRichard Lowe 		fp_seed();
1128f0089e39SRichard Lowe 		kpreempt_enable();
1129f0089e39SRichard Lowe 	}
1130f0089e39SRichard Lowe 
1131f0089e39SRichard Lowe 	/*
1132f0089e39SRichard Lowe 	 * Ensure that the current hardware state is flushed back to the
1133f0089e39SRichard Lowe 	 * pcb, then modify that copy.  Next use of the fp will
1134f0089e39SRichard Lowe 	 * restore the context.
1135f0089e39SRichard Lowe 	 */
1136f0089e39SRichard Lowe 	fp_save(fp);
1137f0089e39SRichard Lowe 
1138f0089e39SRichard Lowe 	switch (fp_save_mech) {
1139f0089e39SRichard Lowe 	case FP_FXSAVE:
1140f0089e39SRichard Lowe 		fx = fp->fpu_regs.kfpu_u.kfpu_fx;
1141f0089e39SRichard Lowe 		fx->fx_fcw = fcw;
1142f0089e39SRichard Lowe 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
1143f0089e39SRichard Lowe 		break;
1144f0089e39SRichard Lowe 
1145f0089e39SRichard Lowe 	case FP_XSAVE:
1146f0089e39SRichard Lowe 		fx = &fp->fpu_regs.kfpu_u.kfpu_xs->xs_fxsave;
1147f0089e39SRichard Lowe 		fx->fx_fcw = fcw;
1148f0089e39SRichard Lowe 		fx->fx_mxcsr = sse_mxcsr_mask & mxcsr;
1149f0089e39SRichard Lowe 		/*
1150f0089e39SRichard Lowe 		 * Always set LEGACY_FP as it may have been cleared by XSAVE
1151f0089e39SRichard Lowe 		 * instruction
1152f0089e39SRichard Lowe 		 */
1153*957246c9SPatrick Mooney 		fp->fpu_regs.kfpu_u.kfpu_xs->xs_header.xsh_xstate_bv |=
1154*957246c9SPatrick Mooney 		    XFEATURE_LEGACY_FP;
1155f0089e39SRichard Lowe 		break;
1156f0089e39SRichard Lowe 	default:
1157f0089e39SRichard Lowe 		panic("Invalid fp_save_mech");
1158f0089e39SRichard Lowe 		/*NOTREACHED*/
1159f0089e39SRichard Lowe 	}
1160f0089e39SRichard Lowe }
1161f0089e39SRichard Lowe 
1162f0089e39SRichard Lowe static void
1163f0089e39SRichard Lowe kernel_fpu_fpstate_init(kfpu_state_t *kfpu)
1164f0089e39SRichard Lowe {
1165f0089e39SRichard Lowe 	struct xsave_state *xs;
1166f0089e39SRichard Lowe 
1167f0089e39SRichard Lowe 	switch (fp_save_mech) {
1168f0089e39SRichard Lowe 	case FP_FXSAVE:
1169f0089e39SRichard Lowe 		bcopy(&sse_initial, kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_fx,
1170f0089e39SRichard Lowe 		    sizeof (struct fxsave_state));
1171f0089e39SRichard Lowe 		kfpu->kfpu_ctx.fpu_xsave_mask = 0;
1172f0089e39SRichard Lowe 		break;
1173f0089e39SRichard Lowe 	case FP_XSAVE:
1174f0089e39SRichard Lowe 		xs = kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_xs;
1175f0089e39SRichard Lowe 		bzero(xs, cpuid_get_xsave_size());
1176f0089e39SRichard Lowe 		bcopy(&avx_initial, xs, sizeof (*xs));
1177*957246c9SPatrick Mooney 		xs->xs_header.xsh_xstate_bv = XFEATURE_LEGACY_FP | XFEATURE_SSE;
1178f0089e39SRichard Lowe 		kfpu->kfpu_ctx.fpu_xsave_mask = XFEATURE_FP_ALL;
1179f0089e39SRichard Lowe 		break;
1180f0089e39SRichard Lowe 	default:
1181f0089e39SRichard Lowe 		panic("invalid fp_save_mech");
1182f0089e39SRichard Lowe 	}
1183f0089e39SRichard Lowe 
1184f0089e39SRichard Lowe 	/*
1185f0089e39SRichard Lowe 	 * Set the corresponding flags that the system expects on the FPU state
1186f0089e39SRichard Lowe 	 * to indicate that this is our state. The FPU_EN flag is required to
1187f0089e39SRichard Lowe 	 * indicate that FPU usage is allowed. The FPU_KERN flag is explicitly
1188f0089e39SRichard Lowe 	 * not set below as it represents that this state is being suppressed
1189f0089e39SRichard Lowe 	 * by the kernel.
1190f0089e39SRichard Lowe 	 */
1191f0089e39SRichard Lowe 	kfpu->kfpu_ctx.fpu_flags = FPU_EN | FPU_VALID;
1192f0089e39SRichard Lowe 	kfpu->kfpu_flags |= KFPU_F_INITIALIZED;
1193f0089e39SRichard Lowe }
1194f0089e39SRichard Lowe 
1195f0089e39SRichard Lowe kfpu_state_t *
1196f0089e39SRichard Lowe kernel_fpu_alloc(int kmflags)
1197f0089e39SRichard Lowe {
1198f0089e39SRichard Lowe 	kfpu_state_t *kfpu;
1199f0089e39SRichard Lowe 
1200f0089e39SRichard Lowe 	if ((kfpu = kmem_zalloc(sizeof (kfpu_state_t), kmflags)) == NULL) {
1201f0089e39SRichard Lowe 		return (NULL);
1202f0089e39SRichard Lowe 	}
1203f0089e39SRichard Lowe 
1204f0089e39SRichard Lowe 	kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic =
1205f0089e39SRichard Lowe 	    kmem_cache_alloc(fpsave_cachep, kmflags);
1206f0089e39SRichard Lowe 	if (kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic == NULL) {
1207f0089e39SRichard Lowe 		kmem_free(kfpu, sizeof (kfpu_state_t));
1208f0089e39SRichard Lowe 		return (NULL);
1209f0089e39SRichard Lowe 	}
1210f0089e39SRichard Lowe 
1211f0089e39SRichard Lowe 	kernel_fpu_fpstate_init(kfpu);
1212f0089e39SRichard Lowe 
1213f0089e39SRichard Lowe 	return (kfpu);
1214f0089e39SRichard Lowe }
1215f0089e39SRichard Lowe 
1216f0089e39SRichard Lowe void
1217f0089e39SRichard Lowe kernel_fpu_free(kfpu_state_t *kfpu)
1218f0089e39SRichard Lowe {
1219f0089e39SRichard Lowe 	kmem_cache_free(fpsave_cachep,
1220f0089e39SRichard Lowe 	    kfpu->kfpu_ctx.fpu_regs.kfpu_u.kfpu_generic);
1221f0089e39SRichard Lowe 	kmem_free(kfpu, sizeof (kfpu_state_t));
1222f0089e39SRichard Lowe }
1223f0089e39SRichard Lowe 
1224f0089e39SRichard Lowe static void
1225f0089e39SRichard Lowe kernel_fpu_ctx_save(void *arg)
1226f0089e39SRichard Lowe {
1227f0089e39SRichard Lowe 	kfpu_state_t *kfpu = arg;
1228f0089e39SRichard Lowe 	fpu_ctx_t *pf;
1229f0089e39SRichard Lowe 
1230f0089e39SRichard Lowe 	if (kfpu == NULL) {
1231f0089e39SRichard Lowe 		/*
1232f0089e39SRichard Lowe 		 * A NULL kfpu implies this is a kernel thread with an LWP and
1233f0089e39SRichard Lowe 		 * no user-level FPU usage. Use the lwp fpu save area.
1234f0089e39SRichard Lowe 		 */
1235f0089e39SRichard Lowe 		pf = &curthread->t_lwp->lwp_pcb.pcb_fpu;
1236f0089e39SRichard Lowe 
1237f0089e39SRichard Lowe 		ASSERT(curthread->t_procp->p_flag & SSYS);
1238f0089e39SRichard Lowe 		ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0);
1239f0089e39SRichard Lowe 
1240f0089e39SRichard Lowe 		fp_save(pf);
1241f0089e39SRichard Lowe 	} else {
1242f0089e39SRichard Lowe 		pf = &kfpu->kfpu_ctx;
1243f0089e39SRichard Lowe 
1244f0089e39SRichard Lowe 		ASSERT3P(kfpu->kfpu_curthread, ==, curthread);
1245f0089e39SRichard Lowe 		ASSERT3U(pf->fpu_flags & FPU_VALID, ==, 0);
1246f0089e39SRichard Lowe 
1247f0089e39SRichard Lowe 		/*
1248f0089e39SRichard Lowe 		 * Note, we can't use fp_save because it assumes that we're
1249f0089e39SRichard Lowe 		 * saving to the thread's PCB and not somewhere else. Because
1250f0089e39SRichard Lowe 		 * this is a different FPU context, we instead have to do this
1251f0089e39SRichard Lowe 		 * ourselves.
1252f0089e39SRichard Lowe 		 */
1253f0089e39SRichard Lowe 		switch (fp_save_mech) {
1254f0089e39SRichard Lowe 		case FP_FXSAVE:
1255f0089e39SRichard Lowe 			fpxsave(pf->fpu_regs.kfpu_u.kfpu_fx);
1256f0089e39SRichard Lowe 			break;
1257f0089e39SRichard Lowe 		case FP_XSAVE:
1258f0089e39SRichard Lowe 			xsavep(pf->fpu_regs.kfpu_u.kfpu_xs, pf->fpu_xsave_mask);
1259f0089e39SRichard Lowe 			break;
1260f0089e39SRichard Lowe 		default:
1261f0089e39SRichard Lowe 			panic("Invalid fp_save_mech");
1262f0089e39SRichard Lowe 		}
1263f0089e39SRichard Lowe 
1264f0089e39SRichard Lowe 		/*
1265f0089e39SRichard Lowe 		 * Because we have saved context here, our save state is no
1266f0089e39SRichard Lowe 		 * longer valid and therefore needs to be reinitialized.
1267f0089e39SRichard Lowe 		 */
1268f0089e39SRichard Lowe 		kfpu->kfpu_flags &= ~KFPU_F_INITIALIZED;
1269f0089e39SRichard Lowe 	}
1270f0089e39SRichard Lowe 
1271f0089e39SRichard Lowe 	pf->fpu_flags |= FPU_VALID;
1272f0089e39SRichard Lowe 
1273f0089e39SRichard Lowe 	/*
1274f0089e39SRichard Lowe 	 * Clear KFPU flag. This allows swtch to check for improper kernel
1275f0089e39SRichard Lowe 	 * usage of the FPU (i.e. switching to a new thread while the old
1276f0089e39SRichard Lowe 	 * thread was in the kernel and using the FPU, but did not perform a
1277f0089e39SRichard Lowe 	 * context save).
1278f0089e39SRichard Lowe 	 */
1279f0089e39SRichard Lowe 	curthread->t_flag &= ~T_KFPU;
1280f0089e39SRichard Lowe }
1281f0089e39SRichard Lowe 
1282f0089e39SRichard Lowe static void
1283f0089e39SRichard Lowe kernel_fpu_ctx_restore(void *arg)
1284f0089e39SRichard Lowe {
1285f0089e39SRichard Lowe 	kfpu_state_t *kfpu = arg;
1286f0089e39SRichard Lowe 	fpu_ctx_t *pf;
1287f0089e39SRichard Lowe 
1288f0089e39SRichard Lowe 	if (kfpu == NULL) {
1289f0089e39SRichard Lowe 		/*
1290f0089e39SRichard Lowe 		 * A NULL kfpu implies this is a kernel thread with an LWP and
1291f0089e39SRichard Lowe 		 * no user-level FPU usage. Use the lwp fpu save area.
1292f0089e39SRichard Lowe 		 */
1293f0089e39SRichard Lowe 		pf = &curthread->t_lwp->lwp_pcb.pcb_fpu;
1294f0089e39SRichard Lowe 
1295f0089e39SRichard Lowe 		ASSERT(curthread->t_procp->p_flag & SSYS);
1296f0089e39SRichard Lowe 		ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0);
1297f0089e39SRichard Lowe 	} else {
1298f0089e39SRichard Lowe 		pf = &kfpu->kfpu_ctx;
1299f0089e39SRichard Lowe 
1300f0089e39SRichard Lowe 		ASSERT3P(kfpu->kfpu_curthread, ==, curthread);
1301f0089e39SRichard Lowe 		ASSERT3U(pf->fpu_flags & FPU_VALID, !=, 0);
1302f0089e39SRichard Lowe 	}
1303f0089e39SRichard Lowe 
1304f0089e39SRichard Lowe 	fp_restore(pf);
1305f0089e39SRichard Lowe 	curthread->t_flag |= T_KFPU;
1306f0089e39SRichard Lowe }
1307f0089e39SRichard Lowe 
1308f0089e39SRichard Lowe /*
1309f0089e39SRichard Lowe  * Validate that the thread is not switching off-cpu while actively using the
1310f0089e39SRichard Lowe  * FPU within the kernel.
1311f0089e39SRichard Lowe  */
1312f0089e39SRichard Lowe void
1313f0089e39SRichard Lowe kernel_fpu_no_swtch(void)
1314f0089e39SRichard Lowe {
1315f0089e39SRichard Lowe 	if ((curthread->t_flag & T_KFPU) != 0) {
1316f0089e39SRichard Lowe 		panic("curthread swtch-ing while the kernel is using the FPU");
1317f0089e39SRichard Lowe 	}
1318f0089e39SRichard Lowe }
1319f0089e39SRichard Lowe 
13205a469116SPatrick Mooney static const struct ctxop_template kfpu_ctxop_tpl = {
13215a469116SPatrick Mooney 	.ct_rev		= CTXOP_TPL_REV,
13225a469116SPatrick Mooney 	.ct_save	= kernel_fpu_ctx_save,
13235a469116SPatrick Mooney 	.ct_restore	= kernel_fpu_ctx_restore,
13245a469116SPatrick Mooney };
13255a469116SPatrick Mooney 
1326f0089e39SRichard Lowe void
1327f0089e39SRichard Lowe kernel_fpu_begin(kfpu_state_t *kfpu, uint_t flags)
1328f0089e39SRichard Lowe {
1329f0089e39SRichard Lowe 	klwp_t *pl = curthread->t_lwp;
1330f0089e39SRichard Lowe 	struct ctxop *ctx;
1331f0089e39SRichard Lowe 
1332f0089e39SRichard Lowe 	if ((curthread->t_flag & T_KFPU) != 0) {
1333f0089e39SRichard Lowe 		panic("curthread attempting to nest kernel FPU states");
1334f0089e39SRichard Lowe 	}
1335f0089e39SRichard Lowe 
1336f0089e39SRichard Lowe 	/* KFPU_USE_LWP and KFPU_NO_STATE are mutually exclusive. */
1337f0089e39SRichard Lowe 	ASSERT((flags & (KFPU_USE_LWP | KFPU_NO_STATE)) !=
1338f0089e39SRichard Lowe 	    (KFPU_USE_LWP | KFPU_NO_STATE));
1339f0089e39SRichard Lowe 
1340f0089e39SRichard Lowe 	if ((flags & KFPU_NO_STATE) == KFPU_NO_STATE) {
1341f0089e39SRichard Lowe 		/*
1342f0089e39SRichard Lowe 		 * Since we don't have a kfpu_state or usable lwp pcb_fpu to
1343f0089e39SRichard Lowe 		 * hold our kernel FPU context, we depend on the caller doing
1344f0089e39SRichard Lowe 		 * kpreempt_disable for the duration of our FPU usage. This
1345f0089e39SRichard Lowe 		 * should only be done for very short periods of time.
1346f0089e39SRichard Lowe 		 */
1347f0089e39SRichard Lowe 		ASSERT(curthread->t_preempt > 0);
1348f0089e39SRichard Lowe 		ASSERT(kfpu == NULL);
1349f0089e39SRichard Lowe 
1350f0089e39SRichard Lowe 		if (pl != NULL) {
1351f0089e39SRichard Lowe 			/*
1352f0089e39SRichard Lowe 			 * We might have already saved once so FPU_VALID could
1353f0089e39SRichard Lowe 			 * be set. This is handled in fp_save.
1354f0089e39SRichard Lowe 			 */
1355f0089e39SRichard Lowe 			fp_save(&pl->lwp_pcb.pcb_fpu);
1356f0089e39SRichard Lowe 			pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL;
1357f0089e39SRichard Lowe 		}
1358f0089e39SRichard Lowe 
1359f0089e39SRichard Lowe 		curthread->t_flag |= T_KFPU;
1360f0089e39SRichard Lowe 
1361f0089e39SRichard Lowe 		/* Always restore the fpu to the initial state. */
1362f0089e39SRichard Lowe 		fpinit();
1363f0089e39SRichard Lowe 
1364f0089e39SRichard Lowe 		return;
1365f0089e39SRichard Lowe 	}
1366f0089e39SRichard Lowe 
1367f0089e39SRichard Lowe 	/*
1368f0089e39SRichard Lowe 	 * We either have a kfpu, or are using the LWP pcb_fpu for context ops.
1369f0089e39SRichard Lowe 	 */
1370f0089e39SRichard Lowe 
1371f0089e39SRichard Lowe 	if ((flags & KFPU_USE_LWP) == 0) {
1372f0089e39SRichard Lowe 		if (kfpu->kfpu_curthread != NULL)
1373f0089e39SRichard Lowe 			panic("attempting to reuse kernel FPU state at %p when "
1374f0089e39SRichard Lowe 			    "another thread already is using", kfpu);
1375f0089e39SRichard Lowe 
1376f0089e39SRichard Lowe 		if ((kfpu->kfpu_flags & KFPU_F_INITIALIZED) == 0)
1377f0089e39SRichard Lowe 			kernel_fpu_fpstate_init(kfpu);
1378f0089e39SRichard Lowe 
1379f0089e39SRichard Lowe 		kfpu->kfpu_curthread = curthread;
1380f0089e39SRichard Lowe 	}
1381f0089e39SRichard Lowe 
1382f0089e39SRichard Lowe 	/*
1383f0089e39SRichard Lowe 	 * Not all threads may have an active LWP. If they do and we're not
1384f0089e39SRichard Lowe 	 * going to re-use the LWP, then we should go ahead and save the state.
1385f0089e39SRichard Lowe 	 * We must also note that the fpu is now being used by the kernel and
1386f0089e39SRichard Lowe 	 * therefore we do not want to manage the fpu state via the user-level
1387f0089e39SRichard Lowe 	 * thread's context handlers.
1388f0089e39SRichard Lowe 	 *
1389f0089e39SRichard Lowe 	 * We might have already saved once (due to a prior use of the kernel
1390f0089e39SRichard Lowe 	 * FPU or another code path) so FPU_VALID could be set. This is handled
1391f0089e39SRichard Lowe 	 * by fp_save, as is the FPU_EN check.
1392f0089e39SRichard Lowe 	 */
13935a469116SPatrick Mooney 	ctx = ctxop_allocate(&kfpu_ctxop_tpl, kfpu);
1394f0089e39SRichard Lowe 	kpreempt_disable();
1395f0089e39SRichard Lowe 	if (pl != NULL) {
1396f0089e39SRichard Lowe 		if ((flags & KFPU_USE_LWP) == 0)
1397f0089e39SRichard Lowe 			fp_save(&pl->lwp_pcb.pcb_fpu);
1398f0089e39SRichard Lowe 		pl->lwp_pcb.pcb_fpu.fpu_flags |= FPU_KERNEL;
1399f0089e39SRichard Lowe 	}
1400f0089e39SRichard Lowe 
1401f0089e39SRichard Lowe 	/*
14025a469116SPatrick Mooney 	 * Set the context operations for kernel FPU usage.  Because kernel FPU
14035a469116SPatrick Mooney 	 * setup and ctxop attachment needs to happen under the protection of
14045a469116SPatrick Mooney 	 * kpreempt_disable(), we allocate the ctxop outside the guard so its
14055a469116SPatrick Mooney 	 * sleeping allocation will not cause a voluntary swtch().  This allows
14065a469116SPatrick Mooney 	 * the rest of the initialization to proceed, ensuring valid state for
14075a469116SPatrick Mooney 	 * the ctxop handlers.
1408f0089e39SRichard Lowe 	 */
14095a469116SPatrick Mooney 	ctxop_attach(curthread, ctx);
1410f0089e39SRichard Lowe 	curthread->t_flag |= T_KFPU;
1411f0089e39SRichard Lowe 
1412f0089e39SRichard Lowe 	if ((flags & KFPU_USE_LWP) == KFPU_USE_LWP) {
1413f0089e39SRichard Lowe 		/*
1414f0089e39SRichard Lowe 		 * For pure kernel threads with an LWP, we can use the LWP's
1415f0089e39SRichard Lowe 		 * pcb_fpu to save/restore context.
1416f0089e39SRichard Lowe 		 */
1417f0089e39SRichard Lowe 		fpu_ctx_t *pf = &pl->lwp_pcb.pcb_fpu;
1418f0089e39SRichard Lowe 
1419f0089e39SRichard Lowe 		VERIFY(curthread->t_procp->p_flag & SSYS);
1420f0089e39SRichard Lowe 		VERIFY(kfpu == NULL);
1421f0089e39SRichard Lowe 		ASSERT((pf->fpu_flags & FPU_EN) == 0);
1422f0089e39SRichard Lowe 
1423f0089e39SRichard Lowe 		/* Always restore the fpu to the initial state. */
1424f0089e39SRichard Lowe 		if (fp_save_mech == FP_XSAVE)
1425f0089e39SRichard Lowe 			pf->fpu_xsave_mask = XFEATURE_FP_ALL;
1426f0089e39SRichard Lowe 		fpinit();
1427f0089e39SRichard Lowe 		pf->fpu_flags = FPU_EN | FPU_KERNEL;
1428f0089e39SRichard Lowe 	} else {
1429f0089e39SRichard Lowe 		/* initialize the kfpu state */
1430f0089e39SRichard Lowe 		kernel_fpu_ctx_restore(kfpu);
1431f0089e39SRichard Lowe 	}
1432f0089e39SRichard Lowe 	kpreempt_enable();
1433f0089e39SRichard Lowe }
1434f0089e39SRichard Lowe 
1435f0089e39SRichard Lowe void
1436f0089e39SRichard Lowe kernel_fpu_end(kfpu_state_t *kfpu, uint_t flags)
1437f0089e39SRichard Lowe {
1438f0089e39SRichard Lowe 	if ((curthread->t_flag & T_KFPU) == 0) {
1439f0089e39SRichard Lowe 		panic("curthread attempting to clear kernel FPU state "
1440f0089e39SRichard Lowe 		    "without using it");
1441f0089e39SRichard Lowe 	}
1442f0089e39SRichard Lowe 
1443f0089e39SRichard Lowe 	/*
1444f0089e39SRichard Lowe 	 * General comments on why the rest of this function is structured the
1445f0089e39SRichard Lowe 	 * way it is. Be aware that there is a lot of subtlety here.
1446f0089e39SRichard Lowe 	 *
1447f0089e39SRichard Lowe 	 * If a user-level thread ever uses the fpu while in the kernel, then
1448f0089e39SRichard Lowe 	 * we cannot call fpdisable since that does STTS. That will set the
1449f0089e39SRichard Lowe 	 * ts bit in %cr0 which will cause an exception if anything touches the
1450f0089e39SRichard Lowe 	 * fpu. However, the user-level context switch handler (fpsave_ctxt)
1451f0089e39SRichard Lowe 	 * needs to access the fpu to save the registers into the pcb.
1452f0089e39SRichard Lowe 	 * fpsave_ctxt relies on CLTS having been done to clear the ts bit in
1453f0089e39SRichard Lowe 	 * fprestore_ctxt when the thread context switched onto the CPU.
1454f0089e39SRichard Lowe 	 *
1455f0089e39SRichard Lowe 	 * Calling fpdisable only effects the current CPU's %cr0 register.
1456f0089e39SRichard Lowe 	 *
14575a469116SPatrick Mooney 	 * During ctxop_remove and kpreempt_enable, we can voluntarily context
1458f0089e39SRichard Lowe 	 * switch, so the CPU we were on when we entered this function might
14595a469116SPatrick Mooney 	 * not be the same one we're on when we return from ctxop_remove or end
1460f0089e39SRichard Lowe 	 * the function. Note there can be user-level context switch handlers
1461f0089e39SRichard Lowe 	 * still installed if this is a user-level thread.
1462f0089e39SRichard Lowe 	 *
1463f0089e39SRichard Lowe 	 * We also must be careful in the unlikely chance we're running in an
1464f0089e39SRichard Lowe 	 * interrupt thread, since we can't leave the CPU's %cr0 TS state set
1465f0089e39SRichard Lowe 	 * incorrectly for the "real" thread to resume on this CPU.
1466f0089e39SRichard Lowe 	 */
1467f0089e39SRichard Lowe 
1468f0089e39SRichard Lowe 	if ((flags & KFPU_NO_STATE) == 0) {
1469f0089e39SRichard Lowe 		kpreempt_disable();
1470f0089e39SRichard Lowe 	} else {
1471f0089e39SRichard Lowe 		ASSERT(curthread->t_preempt > 0);
1472f0089e39SRichard Lowe 	}
1473f0089e39SRichard Lowe 
1474f0089e39SRichard Lowe 	curthread->t_flag &= ~T_KFPU;
1475f0089e39SRichard Lowe 
1476f0089e39SRichard Lowe 	/*
1477f0089e39SRichard Lowe 	 * When we are ending things, we explicitly don't save the current
1478f0089e39SRichard Lowe 	 * kernel FPU state back to the temporary state. The kfpu API is not
1479f0089e39SRichard Lowe 	 * intended to be a permanent save location.
1480f0089e39SRichard Lowe 	 *
1481f0089e39SRichard Lowe 	 * If this is a user-level thread and we were to context switch
1482f0089e39SRichard Lowe 	 * before returning to user-land, fpsave_ctxt will be a no-op since we
1483f0089e39SRichard Lowe 	 * already saved the user-level FPU state the first time we run
1484f0089e39SRichard Lowe 	 * kernel_fpu_begin (i.e. we won't save the bad kernel fpu state over
1485f0089e39SRichard Lowe 	 * the user-level fpu state). The fpsave_ctxt functions only save if
1486f0089e39SRichard Lowe 	 * FPU_VALID is not already set. fp_save also set PCB_SET_UPDATE_FPU so
1487f0089e39SRichard Lowe 	 * fprestore_ctxt will be done in sys_rtt_common when the thread
1488f0089e39SRichard Lowe 	 * finally returns to user-land.
1489f0089e39SRichard Lowe 	 */
1490f0089e39SRichard Lowe 
1491f0089e39SRichard Lowe 	if ((curthread->t_procp->p_flag & SSYS) != 0 &&
1492f0089e39SRichard Lowe 	    curthread->t_intr == NULL) {
1493f0089e39SRichard Lowe 		/*
1494f0089e39SRichard Lowe 		 * A kernel thread which is not an interrupt thread, so we
1495f0089e39SRichard Lowe 		 * STTS now.
1496f0089e39SRichard Lowe 		 */
1497f0089e39SRichard Lowe 		fpdisable();
1498f0089e39SRichard Lowe 	}
1499f0089e39SRichard Lowe 
1500f0089e39SRichard Lowe 	if ((flags & KFPU_NO_STATE) == 0) {
15015a469116SPatrick Mooney 		ctxop_remove(curthread, &kfpu_ctxop_tpl, kfpu);
1502f0089e39SRichard Lowe 
1503f0089e39SRichard Lowe 		if (kfpu != NULL) {
1504f0089e39SRichard Lowe 			if (kfpu->kfpu_curthread != curthread) {
1505f0089e39SRichard Lowe 				panic("attempting to end kernel FPU state "
1506f0089e39SRichard Lowe 				    "for %p, but active thread is not "
1507f0089e39SRichard Lowe 				    "curthread", kfpu);
1508f0089e39SRichard Lowe 			} else {
1509f0089e39SRichard Lowe 				kfpu->kfpu_curthread = NULL;
1510f0089e39SRichard Lowe 			}
1511f0089e39SRichard Lowe 		}
1512f0089e39SRichard Lowe 
1513f0089e39SRichard Lowe 		kpreempt_enable();
1514f0089e39SRichard Lowe 	}
1515f0089e39SRichard Lowe 
1516f0089e39SRichard Lowe 	if (curthread->t_lwp != NULL) {
1517f0089e39SRichard Lowe 		uint_t f;
1518f0089e39SRichard Lowe 
1519f0089e39SRichard Lowe 		if (flags & KFPU_USE_LWP) {
1520f0089e39SRichard Lowe 			f = FPU_EN | FPU_KERNEL;
1521f0089e39SRichard Lowe 		} else {
1522f0089e39SRichard Lowe 			f = FPU_KERNEL;
1523f0089e39SRichard Lowe 		}
1524f0089e39SRichard Lowe 		curthread->t_lwp->lwp_pcb.pcb_fpu.fpu_flags &= ~f;
1525f0089e39SRichard Lowe 	}
1526f0089e39SRichard Lowe }
1527