xref: /illumos-gate/usr/src/uts/intel/sys/fp.h (revision 75840da35ecec00345f7f5f5d85a1f19fae4bd26)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
23  * Copyright (c) 2018, Joyent, Inc.
24  * Copyright 2023 Oxide Computer Company
25  * Copyright 2025 Edgecast Cloud LLC.
26  *
27  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
28  */
29 
30 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
31 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
32 /*		All Rights Reserved				*/
33 
34 #ifndef _SYS_FP_H
35 #define	_SYS_FP_H
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 /*
42  * 80287/80387 and SSE/SSE2 floating point processor definitions
43  */
44 
45 /*
46  * values that go into fp_kind
47  */
48 #define	FP_NO	0	/* no fp chip, no emulator (no fp support)	*/
49 #define	FP_SW	1	/* no fp chip, using software emulator		*/
50 #define	FP_HW	2	/* chip present bit				*/
51 #define	FP_287	2	/* 80287 chip present				*/
52 #define	FP_387	3	/* 80387 chip present				*/
53 #define	FP_487	6	/* 80487 chip present				*/
54 #define	FP_486	6	/* 80486 chip present				*/
55 /*
56  * The following values are bit flags instead of actual values.
57  * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
58  * of (value == __FP_SSE).
59  */
60 #define	__FP_SSE	0x100	/* .. plus SSE-capable CPU		*/
61 #define	__FP_AVX	0x200	/* .. plus AVX-capable CPU		*/
62 
63 /*
64  * values that go into fp_save_mech
65  */
66 #define	FP_FNSAVE	1	/* fnsave/frstor instructions		*/
67 #define	FP_FXSAVE	2	/* fxsave/fxrstor instructions		*/
68 #define	FP_XSAVE	3	/* xsave/xrstor instructions		*/
69 
70 /*
71  * masks for 80387 control word
72  */
73 #define	FPIM	0x00000001	/* invalid operation			*/
74 #define	FPDM	0x00000002	/* denormalized operand			*/
75 #define	FPZM	0x00000004	/* zero divide				*/
76 #define	FPOM	0x00000008	/* overflow				*/
77 #define	FPUM	0x00000010	/* underflow				*/
78 #define	FPPM	0x00000020	/* precision				*/
79 #define	FPPC	0x00000300	/* precision control			*/
80 #define	FPRC	0x00000C00	/* rounding control			*/
81 #define	FPIC	0x00001000	/* infinity control			*/
82 #define	WFPDE	0x00000080	/* data chain exception			*/
83 
84 /*
85  * (Old symbol compatibility)
86  */
87 #define	FPINV	FPIM
88 #define	FPDNO	FPDM
89 #define	FPZDIV	FPZM
90 #define	FPOVR	FPOM
91 #define	FPUNR	FPUM
92 #define	FPPRE	FPPM
93 
94 /*
95  * precision, rounding, and infinity options in control word
96  */
97 #define	FPSIG24 0x00000000	/* 24-bit significand precision (short) */
98 #define	FPSIG53 0x00000200	/* 53-bit significand precision (long)	*/
99 #define	FPSIG64 0x00000300	/* 64-bit significand precision (temp)	*/
100 #define	FPRTN	0x00000000	/* round to nearest or even		*/
101 #define	FPRD	0x00000400	/* round down				*/
102 #define	FPRU	0x00000800	/* round up				*/
103 #define	FPCHOP	0x00000C00	/* chop (truncate toward zero)		*/
104 #define	FPP	0x00000000	/* projective infinity			*/
105 #define	FPA	0x00001000	/* affine infinity			*/
106 #define	WFPB17	0x00020000	/* bit 17				*/
107 #define	WFPB24	0x00040000	/* bit 24				*/
108 
109 /*
110  * masks for 80387 status word
111  */
112 #define	FPS_IE	0x00000001	/* invalid operation			*/
113 #define	FPS_DE	0x00000002	/* denormalized operand			*/
114 #define	FPS_ZE	0x00000004	/* zero divide				*/
115 #define	FPS_OE	0x00000008	/* overflow				*/
116 #define	FPS_UE	0x00000010	/* underflow				*/
117 #define	FPS_PE	0x00000020	/* precision				*/
118 #define	FPS_SF	0x00000040	/* stack fault				*/
119 #define	FPS_ES	0x00000080	/* error summary bit			*/
120 #define	FPS_C0	0x00000100	/* C0 bit				*/
121 #define	FPS_C1	0x00000200	/* C1 bit				*/
122 #define	FPS_C2	0x00000400	/* C2 bit				*/
123 #define	FPS_TOP	0x00003800	/* top of stack pointer			*/
124 #define	FPS_C3	0x00004000	/* C3 bit				*/
125 #define	FPS_B	0x00008000	/* busy bit				*/
126 
127 /*
128  * Exception flags manually cleared during x87 exception handling.
129  */
130 #define	FPS_SW_EFLAGS	\
131 	(FPS_IE|FPS_DE|FPS_ZE|FPS_OE|FPS_UE|FPS_PE|FPS_SF|FPS_ES|FPS_B)
132 
133 /*
134  * Initial value of FPU control word as per 4th ed. ABI document
135  * - affine infinity
136  * - round to nearest or even
137  * - 64-bit double precision
138  * - all exceptions masked
139  *
140  * The 4th ed. SVR4 ABI didn't discuss the value of reserved bits. The ISA
141  * defines bit 6 (0x40) as reserved, but also that it is set (rather than clear,
142  * like many other Reserved bits). We preserve that in our value here.
143  */
144 #define	FPU_CW_INIT	0x137f
145 
146 /*
147  * This is the Intel mandated form of the default value of the x87 control word.
148  * This is different from what we use and should only be used in the context of
149  * representing that default state (e.g. in /proc xregs).
150  */
151 #define	FPU_CW_INIT_HW	0x037f
152 
153 /*
154  * masks and flags for SSE/SSE2 MXCSR
155  */
156 #define	SSE_IE	0x00000001	/* invalid operation			*/
157 #define	SSE_DE	0x00000002	/* denormalized operand			*/
158 #define	SSE_ZE	0x00000004	/* zero divide				*/
159 #define	SSE_OE	0x00000008	/* overflow				*/
160 #define	SSE_UE	0x00000010	/* underflow				*/
161 #define	SSE_PE	0x00000020	/* precision				*/
162 #define	SSE_DAZ	0x00000040	/* denormals are zero			*/
163 #define	SSE_IM	0x00000080	/* invalid op exception mask		*/
164 #define	SSE_DM	0x00000100	/* denormalize exception mask		*/
165 #define	SSE_ZM	0x00000200	/* zero-divide exception mask		*/
166 #define	SSE_OM	0x00000400	/* overflow exception mask		*/
167 #define	SSE_UM	0x00000800	/* underflow exception mask		*/
168 #define	SSE_PM	0x00001000	/* precision exception mask		*/
169 #define	SSE_RC	0x00006000	/* rounding control			*/
170 #define	SSE_RD	0x00002000	/* rounding control: round down		*/
171 #define	SSE_RU	0x00004000	/* rounding control: round up		*/
172 #define	SSE_FZ	0x00008000	/* flush to zero for masked underflow	*/
173 
174 #define	SSE_MXCSR_EFLAGS	\
175 	(SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE)	/* 0x3f */
176 
177 #define	SSE_MXCSR_INIT	\
178 	(SSE_IM|SSE_DM|SSE_ZM|SSE_OM|SSE_UM|SSE_PM)	/* 0x1f80 */
179 
180 #define	SSE_MXCSR_MASK_DEFAULT	\
181 	(0xffff & ~SSE_DAZ)				/* 0xffbf */
182 
183 #define	SSE_FMT_MXCSR	\
184 	"\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm"	\
185 	"\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"
186 
187 /*
188  * This structure is written to memory by an 'fnsave' instruction
189  */
190 struct fnsave_state {
191 	uint16_t	f_fcw;
192 	uint16_t	__f_ign0;
193 	uint16_t	f_fsw;
194 	uint16_t	__f_ign1;
195 	uint16_t	f_ftw;
196 	uint16_t	__f_ign2;
197 	uint32_t	f_eip;
198 	uint16_t	f_cs;
199 	uint16_t	f_fop;
200 	uint32_t	f_dp;
201 	uint16_t	f_ds;
202 	uint16_t	__f_ign3;
203 	union {
204 		uint16_t fpr_16[5];	/* 80-bits of x87 state */
205 	} f_st[8];
206 };	/* 108 bytes */
207 
208 /*
209  * This structure is written to memory by an 'fxsave' instruction
210  * Note the variant behaviour of this instruction between long mode
211  * and legacy environments!
212  */
213 struct fxsave_state {
214 	uint16_t	fx_fcw;
215 	uint16_t	fx_fsw;
216 	uint16_t	fx_fctw;	/* compressed tag word */
217 	uint16_t	fx_fop;
218 #if defined(__amd64)
219 	uint64_t	fx_rip;
220 	uint64_t	fx_rdp;
221 #else
222 	uint32_t	fx_eip;
223 	uint16_t	fx_cs;
224 	uint16_t	__fx_ign0;
225 	uint32_t	fx_dp;
226 	uint16_t	fx_ds;
227 	uint16_t	__fx_ign1;
228 #endif
229 	uint32_t	fx_mxcsr;
230 	uint32_t	fx_mxcsr_mask;
231 	union {
232 		uint16_t fpr_16[5];	/* 80-bits of x87 state */
233 		u_longlong_t fpr_mmx;	/* 64-bit mmx register */
234 		uint32_t __fpr_pad[4];	/* (pad out to 128-bits) */
235 	} fx_st[8];
236 #if defined(__amd64)
237 	upad128_t	fx_xmm[16];	/* 128-bit registers */
238 	upad128_t	__fx_ign2[6];
239 #else
240 	upad128_t	fx_xmm[8];	/* 128-bit registers */
241 	upad128_t	__fx_ign2[14];
242 #endif
243 } __aligned(16);	/* 512 bytes */
244 
245 /*
246  * This structure represents the header portion of the data layout used by the
247  * 'xsave' instruction variants.  It is documented in section 13.4.2 of the
248  * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1
249  * (IASDv1).  Although "header" is somewhat of a misnomer, considering the data
250  * begins at offset 512 of the xsave area, its contents dictate which portions
251  * of the area are present and how they may be formatted.
252  */
253 struct xsave_header {
254 	uint64_t	xsh_xstate_bv;
255 	uint64_t	xsh_xcomp_bv;
256 	uint64_t	xsh_reserved[6];
257 };
258 
259 /*
260  * This structure is written to memory by one of the 'xsave' instruction
261  * variants. The first 512 bytes are compatible with the format of the 'fxsave'
262  * area.  The extended portion is documented in section 13.4.3.
263  *
264  * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
265  * statically.  Enabling additional xsave-related CPU features requires an
266  * increase in the size. We dynamically allocate the per-lwp xsave area at
267  * runtime, based on the size needed for the CPU-specific features. This
268  * xsave_state structure simply defines our historical layout for the beginning
269  * of the xsave area. The locations and size of new, extended, components is
270  * determined dynamically by querying the CPU. See the xsave_info structure in
271  * cpuid.c.
272  *
273  * xsave component usage is tracked using bits in the xstate_bv field of the
274  * header. The components are documented in section 13.1 of IASDv1. For easy
275  * reference, this is a summary of the currently defined component bit
276  * definitions:
277  *	x87			0x0001
278  *	SSE			0x0002
279  *	AVX			0x0004
280  *	bndreg (MPX)		0x0008
281  *	bndcsr (MPX)		0x0010
282  *	opmask (AVX512)		0x0020
283  *	zmm hi256 (AVX512)	0x0040
284  *	zmm hi16 (AVX512)	0x0080
285  *	PT			0x0100
286  *	PKRU			0x0200
287  * When xsaveopt_ctxt is being used to save into the xsave_state area, the
288  * xstate_bv field is updated by the xsaveopt instruction to indicate which
289  * elements of the xsave area are active.
290  *
291  * The xcomp_bv field should always be 0, since we do not currently use the
292  * compressed form of xsave (xsavec).
293  */
294 struct xsave_state {
295 	struct fxsave_state	xs_fxsave;	/* 0-511 legacy region */
296 	struct xsave_header	xs_header;	/* 512-575 XSAVE header */
297 	upad128_t		xs_ymm[16];	/* 576 AVX component */
298 } __aligned(64);
299 
300 /*
301  * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU
302  * state-saving, other consumers may constrain themselves to the minimum
303  * possible xsave state structure, which features only the legacy area and the
304  * bare xsave header.
305  */
306 #define	MIN_XSAVE_SIZE	(sizeof (struct fxsave_state) + \
307 			    sizeof (struct xsave_header))
308 
309 /*
310  * Kernel's FPU save area
311  */
312 typedef struct {
313 	union _kfpu_u {
314 		void *kfpu_generic;
315 		struct fxsave_state *kfpu_fx;
316 		struct xsave_state *kfpu_xs;
317 	} kfpu_u;
318 	uint32_t kfpu_status;		/* saved at #mf exception */
319 	uint32_t kfpu_xstatus;		/* saved at #xm exception */
320 } kfpu_t;
321 
322 extern int fp_kind;		/* kind of fp support */
323 extern int fp_save_mech;	/* fp save/restore mechanism */
324 extern int fpu_exists;		/* FPU hw exists */
325 extern int fp_elf;		/* FP elf type */
326 extern uint64_t xsave_bv_all;	/* Set of enabed xcr0 values */
327 
328 #ifdef _KERNEL
329 
330 extern int fpu_ignored;
331 extern int fpu_pentium_fdivbug;
332 
333 extern uint32_t sse_mxcsr_mask;
334 
335 extern void fpu_probe(void);
336 extern uint_t fpu_initial_probe(void);
337 extern void fpu_save_cache_init(void);
338 
339 extern void fpu_auxv_info(int *, size_t *);
340 extern boolean_t fpu_xsave_enabled(void);
341 
342 extern void fpnsave_ctxt(void *);
343 extern void fpxsave_ctxt(void *);
344 extern void xsave_ctxt(void *);
345 extern void xsaveopt_ctxt(void *);
346 extern void fpxsave_excp_clr_ctxt(void *);
347 extern void xsave_excp_clr_ctxt(void *);
348 extern void xsaveopt_excp_clr_ctxt(void *);
349 extern void (*fpsave_ctxt)(void *);
350 extern void (*xsavep)(struct xsave_state *, uint64_t);
351 
352 extern void fpxrestore_ctxt(void *);
353 extern void xrestore_ctxt(void *);
354 extern void (*fprestore_ctxt)(void *);
355 
356 extern void fxsave_insn(struct fxsave_state *);
357 extern void fpxsave(struct fxsave_state *);
358 extern void fpxrestore(struct fxsave_state *);
359 extern void xsave(struct xsave_state *, uint64_t);
360 extern void xsaveopt(struct xsave_state *, uint64_t);
361 extern void xrestore(struct xsave_state *, uint64_t);
362 
363 extern void fpenable(void);
364 extern void fpdisable(void);
365 extern void fpinit(void);
366 
367 extern uint32_t fperr_reset(void);
368 extern uint32_t fpxerr_reset(void);
369 
370 extern uint32_t fpgetcwsw(void);
371 extern uint32_t fpgetmxcsr(void);
372 
373 struct regs;
374 extern int fpexterrflt(struct regs *);
375 extern int fpsimderrflt(struct regs *);
376 extern void fpsetcw(uint16_t, uint32_t);
377 extern void fp_seed(void);
378 extern void fp_exec(void);
379 struct _klwp;
380 extern void fp_lwp_init(struct _klwp *);
381 extern void fp_lwp_cleanup(struct _klwp *);
382 extern void fp_lwp_dup(struct _klwp *);
383 
384 extern const struct fxsave_state sse_initial;
385 extern const struct xsave_state avx_initial;
386 
387 struct proc;
388 struct ucontext;
389 extern void fpu_proc_xregs_info(struct proc *, uint32_t *, uint32_t *,
390     uint32_t *);
391 extern size_t fpu_proc_xregs_max_size(void);
392 extern void fpu_proc_xregs_get(struct _klwp *, void *);
393 extern int fpu_proc_xregs_set(struct _klwp *, void *);
394 extern int fpu_signal_copyin(struct _klwp *, struct ucontext *);
395 typedef int (*fpu_copyout_f)(const void *, void *, size_t);
396 extern int fpu_signal_copyout(struct _klwp *, uintptr_t, fpu_copyout_f);
397 extern void fpu_set_xsave(struct _klwp *, const void *);
398 extern size_t fpu_signal_size(struct _klwp *);
399 
400 extern void fpu_get_fpregset(struct _klwp *, fpregset_t *);
401 extern void fpu_set_fpregset(struct _klwp *, const fpregset_t *);
402 
403 #endif	/* _KERNEL */
404 
405 #ifdef __cplusplus
406 }
407 #endif
408 
409 #endif	/* _SYS_FP_H */
410