1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 23 * Copyright (c) 2018, Joyent, Inc. 24 * Copyright 2023 Oxide Computer Company 25 * 26 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. 27 */ 28 29 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 31 /* All Rights Reserved */ 32 33 #ifndef _SYS_FP_H 34 #define _SYS_FP_H 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 /* 41 * 80287/80387 and SSE/SSE2 floating point processor definitions 42 */ 43 44 /* 45 * values that go into fp_kind 46 */ 47 #define FP_NO 0 /* no fp chip, no emulator (no fp support) */ 48 #define FP_SW 1 /* no fp chip, using software emulator */ 49 #define FP_HW 2 /* chip present bit */ 50 #define FP_287 2 /* 80287 chip present */ 51 #define FP_387 3 /* 80387 chip present */ 52 #define FP_487 6 /* 80487 chip present */ 53 #define FP_486 6 /* 80486 chip present */ 54 /* 55 * The following values are bit flags instead of actual values. 56 * E.g. to know if we are using SSE, test (value & __FP_SSE) instead 57 * of (value == __FP_SSE). 58 */ 59 #define __FP_SSE 0x100 /* .. plus SSE-capable CPU */ 60 #define __FP_AVX 0x200 /* .. plus AVX-capable CPU */ 61 62 /* 63 * values that go into fp_save_mech 64 */ 65 #define FP_FNSAVE 1 /* fnsave/frstor instructions */ 66 #define FP_FXSAVE 2 /* fxsave/fxrstor instructions */ 67 #define FP_XSAVE 3 /* xsave/xrstor instructions */ 68 69 /* 70 * masks for 80387 control word 71 */ 72 #define FPIM 0x00000001 /* invalid operation */ 73 #define FPDM 0x00000002 /* denormalized operand */ 74 #define FPZM 0x00000004 /* zero divide */ 75 #define FPOM 0x00000008 /* overflow */ 76 #define FPUM 0x00000010 /* underflow */ 77 #define FPPM 0x00000020 /* precision */ 78 #define FPPC 0x00000300 /* precision control */ 79 #define FPRC 0x00000C00 /* rounding control */ 80 #define FPIC 0x00001000 /* infinity control */ 81 #define WFPDE 0x00000080 /* data chain exception */ 82 83 /* 84 * (Old symbol compatibility) 85 */ 86 #define FPINV FPIM 87 #define FPDNO FPDM 88 #define FPZDIV FPZM 89 #define FPOVR FPOM 90 #define FPUNR FPUM 91 #define FPPRE FPPM 92 93 /* 94 * precision, rounding, and infinity options in control word 95 */ 96 #define FPSIG24 0x00000000 /* 24-bit significand precision (short) */ 97 #define FPSIG53 0x00000200 /* 53-bit significand precision (long) */ 98 #define FPSIG64 0x00000300 /* 64-bit significand precision (temp) */ 99 #define FPRTN 0x00000000 /* round to nearest or even */ 100 #define FPRD 0x00000400 /* round down */ 101 #define FPRU 0x00000800 /* round up */ 102 #define FPCHOP 0x00000C00 /* chop (truncate toward zero) */ 103 #define FPP 0x00000000 /* projective infinity */ 104 #define FPA 0x00001000 /* affine infinity */ 105 #define WFPB17 0x00020000 /* bit 17 */ 106 #define WFPB24 0x00040000 /* bit 24 */ 107 108 /* 109 * masks for 80387 status word 110 */ 111 #define FPS_IE 0x00000001 /* invalid operation */ 112 #define FPS_DE 0x00000002 /* denormalized operand */ 113 #define FPS_ZE 0x00000004 /* zero divide */ 114 #define FPS_OE 0x00000008 /* overflow */ 115 #define FPS_UE 0x00000010 /* underflow */ 116 #define FPS_PE 0x00000020 /* precision */ 117 #define FPS_SF 0x00000040 /* stack fault */ 118 #define FPS_ES 0x00000080 /* error summary bit */ 119 #define FPS_C0 0x00000100 /* C0 bit */ 120 #define FPS_C1 0x00000200 /* C1 bit */ 121 #define FPS_C2 0x00000400 /* C2 bit */ 122 #define FPS_TOP 0x00003800 /* top of stack pointer */ 123 #define FPS_C3 0x00004000 /* C3 bit */ 124 #define FPS_B 0x00008000 /* busy bit */ 125 126 /* 127 * Exception flags manually cleared during x87 exception handling. 128 */ 129 #define FPS_SW_EFLAGS \ 130 (FPS_IE|FPS_DE|FPS_ZE|FPS_OE|FPS_UE|FPS_PE|FPS_SF|FPS_ES|FPS_B) 131 132 /* 133 * Initial value of FPU control word as per 4th ed. ABI document 134 * - affine infinity 135 * - round to nearest or even 136 * - 64-bit double precision 137 * - all exceptions masked 138 * 139 * The 4th ed. SVR4 ABI didn't discuss the value of reserved bits. The ISA 140 * defines bit 6 (0x40) as reserved, but also that it is set (rather than clear, 141 * like many other Reserved bits). We preserve that in our value here. 142 */ 143 #define FPU_CW_INIT 0x137f 144 145 /* 146 * This is the Intel mandated form of the default value of the x87 control word. 147 * This is different from what we use and should only be used in the context of 148 * representing that default state (e.g. in /proc xregs). 149 */ 150 #define FPU_CW_INIT_HW 0x037f 151 152 /* 153 * masks and flags for SSE/SSE2 MXCSR 154 */ 155 #define SSE_IE 0x00000001 /* invalid operation */ 156 #define SSE_DE 0x00000002 /* denormalized operand */ 157 #define SSE_ZE 0x00000004 /* zero divide */ 158 #define SSE_OE 0x00000008 /* overflow */ 159 #define SSE_UE 0x00000010 /* underflow */ 160 #define SSE_PE 0x00000020 /* precision */ 161 #define SSE_DAZ 0x00000040 /* denormals are zero */ 162 #define SSE_IM 0x00000080 /* invalid op exception mask */ 163 #define SSE_DM 0x00000100 /* denormalize exception mask */ 164 #define SSE_ZM 0x00000200 /* zero-divide exception mask */ 165 #define SSE_OM 0x00000400 /* overflow exception mask */ 166 #define SSE_UM 0x00000800 /* underflow exception mask */ 167 #define SSE_PM 0x00001000 /* precision exception mask */ 168 #define SSE_RC 0x00006000 /* rounding control */ 169 #define SSE_RD 0x00002000 /* rounding control: round down */ 170 #define SSE_RU 0x00004000 /* rounding control: round up */ 171 #define SSE_FZ 0x00008000 /* flush to zero for masked underflow */ 172 173 #define SSE_MXCSR_EFLAGS \ 174 (SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE) /* 0x3f */ 175 176 #define SSE_MXCSR_INIT \ 177 (SSE_IM|SSE_DM|SSE_ZM|SSE_OM|SSE_UM|SSE_PM) /* 0x1f80 */ 178 179 #define SSE_MXCSR_MASK_DEFAULT \ 180 (0xffff & ~SSE_DAZ) /* 0xffbf */ 181 182 #define SSE_FMT_MXCSR \ 183 "\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm" \ 184 "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie" 185 186 /* 187 * This structure is written to memory by an 'fnsave' instruction 188 */ 189 struct fnsave_state { 190 uint16_t f_fcw; 191 uint16_t __f_ign0; 192 uint16_t f_fsw; 193 uint16_t __f_ign1; 194 uint16_t f_ftw; 195 uint16_t __f_ign2; 196 uint32_t f_eip; 197 uint16_t f_cs; 198 uint16_t f_fop; 199 uint32_t f_dp; 200 uint16_t f_ds; 201 uint16_t __f_ign3; 202 union { 203 uint16_t fpr_16[5]; /* 80-bits of x87 state */ 204 } f_st[8]; 205 }; /* 108 bytes */ 206 207 /* 208 * This structure is written to memory by an 'fxsave' instruction 209 * Note the variant behaviour of this instruction between long mode 210 * and legacy environments! 211 */ 212 struct fxsave_state { 213 uint16_t fx_fcw; 214 uint16_t fx_fsw; 215 uint16_t fx_fctw; /* compressed tag word */ 216 uint16_t fx_fop; 217 #if defined(__amd64) 218 uint64_t fx_rip; 219 uint64_t fx_rdp; 220 #else 221 uint32_t fx_eip; 222 uint16_t fx_cs; 223 uint16_t __fx_ign0; 224 uint32_t fx_dp; 225 uint16_t fx_ds; 226 uint16_t __fx_ign1; 227 #endif 228 uint32_t fx_mxcsr; 229 uint32_t fx_mxcsr_mask; 230 union { 231 uint16_t fpr_16[5]; /* 80-bits of x87 state */ 232 u_longlong_t fpr_mmx; /* 64-bit mmx register */ 233 uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */ 234 } fx_st[8]; 235 #if defined(__amd64) 236 upad128_t fx_xmm[16]; /* 128-bit registers */ 237 upad128_t __fx_ign2[6]; 238 #else 239 upad128_t fx_xmm[8]; /* 128-bit registers */ 240 upad128_t __fx_ign2[14]; 241 #endif 242 } __aligned(16); /* 512 bytes */ 243 244 /* 245 * This structure represents the header portion of the data layout used by the 246 * 'xsave' instruction variants. It is documented in section 13.4.2 of the 247 * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1 248 * (IASDv1). Although "header" is somewhat of a misnomer, considering the data 249 * begins at offset 512 of the xsave area, its contents dictate which portions 250 * of the area are present and how they may be formatted. 251 */ 252 struct xsave_header { 253 uint64_t xsh_xstate_bv; 254 uint64_t xsh_xcomp_bv; 255 uint64_t xsh_reserved[6]; 256 }; 257 258 /* 259 * This structure is written to memory by one of the 'xsave' instruction 260 * variants. The first 512 bytes are compatible with the format of the 'fxsave' 261 * area. The extended portion is documented in section 13.4.3. 262 * 263 * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted 264 * statically. Enabling additional xsave-related CPU features requires an 265 * increase in the size. We dynamically allocate the per-lwp xsave area at 266 * runtime, based on the size needed for the CPU-specific features. This 267 * xsave_state structure simply defines our historical layout for the beginning 268 * of the xsave area. The locations and size of new, extended, components is 269 * determined dynamically by querying the CPU. See the xsave_info structure in 270 * cpuid.c. 271 * 272 * xsave component usage is tracked using bits in the xstate_bv field of the 273 * header. The components are documented in section 13.1 of IASDv1. For easy 274 * reference, this is a summary of the currently defined component bit 275 * definitions: 276 * x87 0x0001 277 * SSE 0x0002 278 * AVX 0x0004 279 * bndreg (MPX) 0x0008 280 * bndcsr (MPX) 0x0010 281 * opmask (AVX512) 0x0020 282 * zmm hi256 (AVX512) 0x0040 283 * zmm hi16 (AVX512) 0x0080 284 * PT 0x0100 285 * PKRU 0x0200 286 * When xsaveopt_ctxt is being used to save into the xsave_state area, the 287 * xstate_bv field is updated by the xsaveopt instruction to indicate which 288 * elements of the xsave area are active. 289 * 290 * The xcomp_bv field should always be 0, since we do not currently use the 291 * compressed form of xsave (xsavec). 292 */ 293 struct xsave_state { 294 struct fxsave_state xs_fxsave; /* 0-511 legacy region */ 295 struct xsave_header xs_header; /* 512-575 XSAVE header */ 296 upad128_t xs_ymm[16]; /* 576 AVX component */ 297 } __aligned(64); 298 299 /* 300 * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU 301 * state-saving, other consumers may constrain themselves to the minimum 302 * possible xsave state structure, which features only the legacy area and the 303 * bare xsave header. 304 */ 305 #define MIN_XSAVE_SIZE (sizeof (struct fxsave_state) + \ 306 sizeof (struct xsave_header)) 307 308 /* 309 * Kernel's FPU save area 310 */ 311 typedef struct { 312 union _kfpu_u { 313 void *kfpu_generic; 314 struct fxsave_state *kfpu_fx; 315 struct xsave_state *kfpu_xs; 316 } kfpu_u; 317 uint32_t kfpu_status; /* saved at #mf exception */ 318 uint32_t kfpu_xstatus; /* saved at #xm exception */ 319 } kfpu_t; 320 321 extern int fp_kind; /* kind of fp support */ 322 extern int fp_save_mech; /* fp save/restore mechanism */ 323 extern int fpu_exists; /* FPU hw exists */ 324 extern int fp_elf; /* FP elf type */ 325 extern uint64_t xsave_bv_all; /* Set of enabed xcr0 values */ 326 327 #ifdef _KERNEL 328 329 extern int fpu_ignored; 330 extern int fpu_pentium_fdivbug; 331 332 extern uint32_t sse_mxcsr_mask; 333 334 extern void fpu_probe(void); 335 extern uint_t fpu_initial_probe(void); 336 337 extern void fpu_auxv_info(int *, size_t *); 338 extern boolean_t fpu_xsave_enabled(void); 339 340 extern void fpnsave_ctxt(void *); 341 extern void fpxsave_ctxt(void *); 342 extern void xsave_ctxt(void *); 343 extern void xsaveopt_ctxt(void *); 344 extern void fpxsave_excp_clr_ctxt(void *); 345 extern void xsave_excp_clr_ctxt(void *); 346 extern void xsaveopt_excp_clr_ctxt(void *); 347 extern void (*fpsave_ctxt)(void *); 348 extern void (*xsavep)(struct xsave_state *, uint64_t); 349 350 extern void fpxrestore_ctxt(void *); 351 extern void xrestore_ctxt(void *); 352 extern void (*fprestore_ctxt)(void *); 353 354 extern void fxsave_insn(struct fxsave_state *); 355 extern void fpxsave(struct fxsave_state *); 356 extern void fpxrestore(struct fxsave_state *); 357 extern void xsave(struct xsave_state *, uint64_t); 358 extern void xsaveopt(struct xsave_state *, uint64_t); 359 extern void xrestore(struct xsave_state *, uint64_t); 360 361 extern void fpenable(void); 362 extern void fpdisable(void); 363 extern void fpinit(void); 364 365 extern uint32_t fperr_reset(void); 366 extern uint32_t fpxerr_reset(void); 367 368 extern uint32_t fpgetcwsw(void); 369 extern uint32_t fpgetmxcsr(void); 370 371 struct regs; 372 extern int fpexterrflt(struct regs *); 373 extern int fpsimderrflt(struct regs *); 374 extern void fpsetcw(uint16_t, uint32_t); 375 extern void fp_seed(void); 376 extern void fp_exec(void); 377 struct _klwp; 378 extern void fp_lwp_init(struct _klwp *); 379 extern void fp_lwp_cleanup(struct _klwp *); 380 extern void fp_lwp_dup(struct _klwp *); 381 382 extern const struct fxsave_state sse_initial; 383 extern const struct xsave_state avx_initial; 384 385 struct proc; 386 struct ucontext; 387 extern void fpu_proc_xregs_info(struct proc *, uint32_t *, uint32_t *, 388 uint32_t *); 389 extern size_t fpu_proc_xregs_max_size(void); 390 extern void fpu_proc_xregs_get(struct _klwp *, void *); 391 extern int fpu_proc_xregs_set(struct _klwp *, void *); 392 extern int fpu_signal_copyin(struct _klwp *, struct ucontext *); 393 typedef int (*fpu_copyout_f)(const void *, void *, size_t); 394 extern int fpu_signal_copyout(struct _klwp *, uintptr_t, fpu_copyout_f); 395 extern void fpu_set_xsave(struct _klwp *, const void *); 396 extern size_t fpu_signal_size(struct _klwp *); 397 398 extern void fpu_get_fpregset(struct _klwp *, fpregset_t *); 399 extern void fpu_set_fpregset(struct _klwp *, const fpregset_t *); 400 401 #endif /* _KERNEL */ 402 403 #ifdef __cplusplus 404 } 405 #endif 406 407 #endif /* _SYS_FP_H */ 408