1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xsave/xrstor support. 4 * 5 * Author: Suresh Siddha <suresh.b.siddha@intel.com> 6 */ 7 #include <linux/bitops.h> 8 #include <linux/compat.h> 9 #include <linux/cpu.h> 10 #include <linux/mman.h> 11 #include <linux/kvm_types.h> 12 #include <linux/nospec.h> 13 #include <linux/pkeys.h> 14 #include <linux/seq_file.h> 15 #include <linux/proc_fs.h> 16 #include <linux/vmalloc.h> 17 #include <linux/coredump.h> 18 #include <linux/sort.h> 19 20 #include <asm/fpu/api.h> 21 #include <asm/fpu/regset.h> 22 #include <asm/fpu/signal.h> 23 #include <asm/fpu/xcr.h> 24 25 #include <asm/cpuid/api.h> 26 #include <asm/msr.h> 27 #include <asm/tlbflush.h> 28 #include <asm/prctl.h> 29 #include <asm/elf.h> 30 31 #include <uapi/asm/elf.h> 32 33 #include "context.h" 34 #include "internal.h" 35 #include "legacy.h" 36 #include "xstate.h" 37 38 #define for_each_extended_xfeature(bit, mask) \ 39 (bit) = FIRST_EXTENDED_XFEATURE; \ 40 for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) 41 42 /* 43 * Although we spell it out in here, the Processor Trace 44 * xfeature is completely unused. We use other mechanisms 45 * to save/restore PT state in Linux. 46 */ 47 static const char *xfeature_names[] = 48 { 49 "x87 floating point registers", 50 "SSE registers", 51 "AVX registers", 52 "MPX bounds registers", 53 "MPX CSR", 54 "AVX-512 opmask", 55 "AVX-512 Hi256", 56 "AVX-512 ZMM_Hi256", 57 "Processor Trace (unused)", 58 "Protection Keys User registers", 59 "PASID state", 60 "Control-flow User registers", 61 "Control-flow Kernel registers (KVM only)", 62 "unknown xstate feature", 63 "unknown xstate feature", 64 "unknown xstate feature", 65 "unknown xstate feature", 66 "AMX Tile config", 67 "AMX Tile data", 68 "APX registers", 69 "unknown xstate feature", 70 }; 71 72 static unsigned short xsave_cpuid_features[] __initdata = { 73 [XFEATURE_FP] = X86_FEATURE_FPU, 74 [XFEATURE_SSE] = X86_FEATURE_XMM, 75 [XFEATURE_YMM] = X86_FEATURE_AVX, 76 [XFEATURE_BNDREGS] = X86_FEATURE_MPX, 77 [XFEATURE_BNDCSR] = X86_FEATURE_MPX, 78 [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, 79 [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, 80 [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, 81 [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, 82 [XFEATURE_PKRU] = X86_FEATURE_OSPKE, 83 [XFEATURE_PASID] = X86_FEATURE_ENQCMD, 84 [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, 85 [XFEATURE_CET_KERNEL] = X86_FEATURE_SHSTK, 86 [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, 87 [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, 88 [XFEATURE_APX] = X86_FEATURE_APX, 89 }; 90 91 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = 92 { [ 0 ... XFEATURE_MAX - 1] = -1}; 93 static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = 94 { [ 0 ... XFEATURE_MAX - 1] = -1}; 95 static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; 96 97 /* 98 * Ordering of xstate components in uncompacted format: The xfeature 99 * number does not necessarily indicate its position in the XSAVE buffer. 100 * This array defines the traversal order of xstate features. 101 */ 102 static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init = 103 { [ 0 ... XFEATURE_MAX - 1] = -1}; 104 105 static inline unsigned int next_xfeature_order(unsigned int i, u64 mask) 106 { 107 for (; xfeature_uncompact_order[i] != -1; i++) { 108 if (mask & BIT_ULL(xfeature_uncompact_order[i])) 109 break; 110 } 111 112 return i; 113 } 114 115 /* Iterate xstate features in uncompacted order: */ 116 #define for_each_extended_xfeature_in_order(i, mask) \ 117 for (i = 0; \ 118 i = next_xfeature_order(i, mask), \ 119 xfeature_uncompact_order[i] != -1; \ 120 i++) 121 122 #define XSTATE_FLAG_SUPERVISOR BIT(0) 123 #define XSTATE_FLAG_ALIGNED64 BIT(1) 124 125 /* 126 * Return whether the system supports a given xfeature. 127 * 128 * Also return the name of the (most advanced) feature that the caller requested: 129 */ 130 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) 131 { 132 u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; 133 134 if (unlikely(feature_name)) { 135 long xfeature_idx, max_idx; 136 u64 xfeatures_print; 137 /* 138 * So we use FLS here to be able to print the most advanced 139 * feature that was requested but is missing. So if a driver 140 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the 141 * missing AVX feature - this is the most informative message 142 * to users: 143 */ 144 if (xfeatures_missing) 145 xfeatures_print = xfeatures_missing; 146 else 147 xfeatures_print = xfeatures_needed; 148 149 xfeature_idx = fls64(xfeatures_print)-1; 150 max_idx = ARRAY_SIZE(xfeature_names)-1; 151 xfeature_idx = min(xfeature_idx, max_idx); 152 153 *feature_name = xfeature_names[xfeature_idx]; 154 } 155 156 if (xfeatures_missing) 157 return 0; 158 159 return 1; 160 } 161 EXPORT_SYMBOL_GPL(cpu_has_xfeatures); 162 163 static bool xfeature_is_aligned64(int xfeature_nr) 164 { 165 return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64; 166 } 167 168 static bool xfeature_is_supervisor(int xfeature_nr) 169 { 170 return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; 171 } 172 173 static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) 174 { 175 unsigned int offs, i; 176 177 /* 178 * Non-compacted format and legacy features use the cached fixed 179 * offsets. 180 */ 181 if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || 182 xfeature <= XFEATURE_SSE) 183 return xstate_offsets[xfeature]; 184 185 /* 186 * Compacted format offsets depend on the actual content of the 187 * compacted xsave area which is determined by the xcomp_bv header 188 * field. 189 */ 190 offs = FXSAVE_SIZE + XSAVE_HDR_SIZE; 191 for_each_extended_xfeature(i, xcomp_bv) { 192 if (xfeature_is_aligned64(i)) 193 offs = ALIGN(offs, 64); 194 if (i == xfeature) 195 break; 196 offs += xstate_sizes[i]; 197 } 198 return offs; 199 } 200 201 /* 202 * Enable the extended processor state save/restore feature. 203 * Called once per CPU onlining. 204 */ 205 void fpu__init_cpu_xstate(void) 206 { 207 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) 208 return; 209 210 cr4_set_bits(X86_CR4_OSXSAVE); 211 212 /* 213 * Must happen after CR4 setup and before xsetbv() to allow KVM 214 * lazy passthrough. Write independent of the dynamic state static 215 * key as that does not work on the boot CPU. This also ensures 216 * that any stale state is wiped out from XFD. Reset the per CPU 217 * xfd cache too. 218 */ 219 if (cpu_feature_enabled(X86_FEATURE_XFD)) 220 xfd_set_state(init_fpstate.xfd); 221 222 /* 223 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features 224 * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user 225 * states can be set here. 226 */ 227 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); 228 229 /* 230 * MSR_IA32_XSS sets supervisor states managed by XSAVES. 231 */ 232 if (boot_cpu_has(X86_FEATURE_XSAVES)) { 233 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | 234 xfeatures_mask_independent()); 235 } 236 } 237 238 static bool xfeature_enabled(enum xfeature xfeature) 239 { 240 return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); 241 } 242 243 static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2) 244 { 245 return xstate_offsets[*(unsigned int *)xfeature1] - 246 xstate_offsets[*(unsigned int *)xfeature2]; 247 } 248 249 /* 250 * Record the offsets and sizes of various xstates contained 251 * in the XSAVE state memory layout. Also, create an ordered 252 * list of xfeatures for handling out-of-order offsets. 253 */ 254 static void __init setup_xstate_cache(void) 255 { 256 u32 eax, ebx, ecx, edx, xfeature, i = 0; 257 /* 258 * The FP xstates and SSE xstates are legacy states. They are always 259 * in the fixed offsets in the xsave area in either compacted form 260 * or standard form. 261 */ 262 xstate_offsets[XFEATURE_FP] = 0; 263 xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, 264 xmm_space); 265 266 xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; 267 xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, 268 xmm_space); 269 270 for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) { 271 cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx); 272 273 xstate_sizes[xfeature] = eax; 274 xstate_flags[xfeature] = ecx; 275 276 /* 277 * If an xfeature is supervisor state, the offset in EBX is 278 * invalid, leave it to -1. 279 */ 280 if (xfeature_is_supervisor(xfeature)) 281 continue; 282 283 xstate_offsets[xfeature] = ebx; 284 285 /* Populate the list of xfeatures before sorting */ 286 xfeature_uncompact_order[i++] = xfeature; 287 } 288 289 /* 290 * Sort xfeatures by their offsets to support out-of-order 291 * offsets in the uncompacted format. 292 */ 293 sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL); 294 } 295 296 /* 297 * Print out all the supported xstate features: 298 */ 299 static void __init print_xstate_features(void) 300 { 301 int i; 302 303 for (i = 0; i < XFEATURE_MAX; i++) { 304 u64 mask = BIT_ULL(i); 305 const char *name; 306 307 if (cpu_has_xfeatures(mask, &name)) 308 pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", mask, name); 309 } 310 } 311 312 /* 313 * This check is important because it is easy to get XSTATE_* 314 * confused with XSTATE_BIT_*. 315 */ 316 #define CHECK_XFEATURE(nr) do { \ 317 WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ 318 WARN_ON(nr >= XFEATURE_MAX); \ 319 } while (0) 320 321 /* 322 * Print out xstate component offsets and sizes 323 */ 324 static void __init print_xstate_offset_size(void) 325 { 326 int i; 327 328 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { 329 pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", 330 i, xfeature_get_offset(fpu_kernel_cfg.max_features, i), 331 i, xstate_sizes[i]); 332 } 333 } 334 335 /* 336 * This function is called only during boot time when x86 caps are not set 337 * up and alternative can not be used yet. 338 */ 339 static __init void os_xrstor_booting(struct xregs_state *xstate) 340 { 341 u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; 342 u32 lmask = mask; 343 u32 hmask = mask >> 32; 344 int err; 345 346 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) 347 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); 348 else 349 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); 350 351 /* 352 * We should never fault when copying from a kernel buffer, and the FPU 353 * state we set at boot time should be valid. 354 */ 355 WARN_ON_FPU(err); 356 } 357 358 /* 359 * All supported features have either init state all zeros or are 360 * handled in setup_init_fpu() individually. This is an explicit 361 * feature list and does not use XFEATURE_MASK*SUPPORTED to catch 362 * newly added supported features at build time and make people 363 * actually look at the init state for the new feature. 364 */ 365 #define XFEATURES_INIT_FPSTATE_HANDLED \ 366 (XFEATURE_MASK_FP | \ 367 XFEATURE_MASK_SSE | \ 368 XFEATURE_MASK_YMM | \ 369 XFEATURE_MASK_OPMASK | \ 370 XFEATURE_MASK_ZMM_Hi256 | \ 371 XFEATURE_MASK_Hi16_ZMM | \ 372 XFEATURE_MASK_PKRU | \ 373 XFEATURE_MASK_BNDREGS | \ 374 XFEATURE_MASK_BNDCSR | \ 375 XFEATURE_MASK_PASID | \ 376 XFEATURE_MASK_CET_USER | \ 377 XFEATURE_MASK_CET_KERNEL | \ 378 XFEATURE_MASK_XTILE | \ 379 XFEATURE_MASK_APX) 380 381 /* 382 * setup the xstate image representing the init state 383 */ 384 static void __init setup_init_fpu_buf(void) 385 { 386 BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | 387 XFEATURE_MASK_SUPERVISOR_SUPPORTED) != 388 XFEATURES_INIT_FPSTATE_HANDLED); 389 390 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 391 return; 392 393 print_xstate_features(); 394 395 xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures); 396 397 /* 398 * Init all the features state with header.xfeatures being 0x0 399 */ 400 os_xrstor_booting(&init_fpstate.regs.xsave); 401 402 /* 403 * All components are now in init state. Read the state back so 404 * that init_fpstate contains all non-zero init state. This only 405 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because 406 * those use the init optimization which skips writing data for 407 * components in init state. 408 * 409 * XSAVE could be used, but that would require to reshuffle the 410 * data when XSAVEC/S is available because XSAVEC/S uses xstate 411 * compaction. But doing so is a pointless exercise because most 412 * components have an all zeros init state except for the legacy 413 * ones (FP and SSE). Those can be saved with FXSAVE into the 414 * legacy area. Adding new features requires to ensure that init 415 * state is all zeroes or if not to add the necessary handling 416 * here. 417 */ 418 fxsave(&init_fpstate.regs.fxsave); 419 } 420 421 int xfeature_size(int xfeature_nr) 422 { 423 u32 eax, ebx, ecx, edx; 424 425 CHECK_XFEATURE(xfeature_nr); 426 cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx); 427 return eax; 428 } 429 430 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ 431 static int validate_user_xstate_header(const struct xstate_header *hdr, 432 struct fpstate *fpstate) 433 { 434 /* No unknown or supervisor features may be set */ 435 if (hdr->xfeatures & ~fpstate->user_xfeatures) 436 return -EINVAL; 437 438 /* Userspace must use the uncompacted format */ 439 if (hdr->xcomp_bv) 440 return -EINVAL; 441 442 /* 443 * If 'reserved' is shrunken to add a new field, make sure to validate 444 * that new field here! 445 */ 446 BUILD_BUG_ON(sizeof(hdr->reserved) != 48); 447 448 /* No reserved bits may be set */ 449 if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 450 return -EINVAL; 451 452 return 0; 453 } 454 455 static void __init __xstate_dump_leaves(void) 456 { 457 int i; 458 u32 eax, ebx, ecx, edx; 459 static int should_dump = 1; 460 461 if (!should_dump) 462 return; 463 should_dump = 0; 464 /* 465 * Dump out a few leaves past the ones that we support 466 * just in case there are some goodies up there 467 */ 468 for (i = 0; i < XFEATURE_MAX + 10; i++) { 469 cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx); 470 pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", 471 CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx); 472 } 473 } 474 475 #define XSTATE_WARN_ON(x, fmt, ...) do { \ 476 if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \ 477 __xstate_dump_leaves(); \ 478 } \ 479 } while (0) 480 481 #define XCHECK_SZ(sz, nr, __struct) ({ \ 482 if (WARN_ONCE(sz != sizeof(__struct), \ 483 "[%s]: struct is %zu bytes, cpu state %d bytes\n", \ 484 xfeature_names[nr], sizeof(__struct), sz)) { \ 485 __xstate_dump_leaves(); \ 486 } \ 487 true; \ 488 }) 489 490 491 /** 492 * check_xtile_data_against_struct - Check tile data state size. 493 * 494 * Calculate the state size by multiplying the single tile size which is 495 * recorded in a C struct, and the number of tiles that the CPU informs. 496 * Compare the provided size with the calculation. 497 * 498 * @size: The tile data state size 499 * 500 * Returns: 0 on success, -EINVAL on mismatch. 501 */ 502 static int __init check_xtile_data_against_struct(int size) 503 { 504 u32 max_palid, palid, state_size; 505 u32 eax, ebx, ecx, edx; 506 u16 max_tile; 507 508 /* 509 * Check the maximum palette id: 510 * eax: the highest numbered palette subleaf. 511 */ 512 cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx); 513 514 /* 515 * Cross-check each tile size and find the maximum number of 516 * supported tiles. 517 */ 518 for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { 519 u16 tile_size, max; 520 521 /* 522 * Check the tile size info: 523 * eax[31:16]: bytes per title 524 * ebx[31:16]: the max names (or max number of tiles) 525 */ 526 cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx); 527 tile_size = eax >> 16; 528 max = ebx >> 16; 529 530 if (tile_size != sizeof(struct xtile_data)) { 531 pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", 532 __stringify(XFEATURE_XTILE_DATA), 533 sizeof(struct xtile_data), tile_size); 534 __xstate_dump_leaves(); 535 return -EINVAL; 536 } 537 538 if (max > max_tile) 539 max_tile = max; 540 } 541 542 state_size = sizeof(struct xtile_data) * max_tile; 543 if (size != state_size) { 544 pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", 545 __stringify(XFEATURE_XTILE_DATA), state_size, size); 546 __xstate_dump_leaves(); 547 return -EINVAL; 548 } 549 return 0; 550 } 551 552 /* 553 * We have a C struct for each 'xstate'. We need to ensure 554 * that our software representation matches what the CPU 555 * tells us about the state's size. 556 */ 557 static bool __init check_xstate_against_struct(int nr) 558 { 559 /* 560 * Ask the CPU for the size of the state. 561 */ 562 int sz = xfeature_size(nr); 563 564 /* 565 * Match each CPU state with the corresponding software 566 * structure. 567 */ 568 switch (nr) { 569 case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct); 570 case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state); 571 case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state); 572 case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state); 573 case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state); 574 case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state); 575 case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state); 576 case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); 577 case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); 578 case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); 579 case XFEATURE_CET_KERNEL: return XCHECK_SZ(sz, nr, struct cet_supervisor_state); 580 case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state); 581 case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; 582 default: 583 XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); 584 return false; 585 } 586 587 return true; 588 } 589 590 static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) 591 { 592 unsigned int topmost = fls64(xfeatures) - 1; 593 unsigned int offset, i; 594 595 if (topmost <= XFEATURE_SSE) 596 return sizeof(struct xregs_state); 597 598 if (compacted) { 599 offset = xfeature_get_offset(xfeatures, topmost); 600 } else { 601 /* Walk through the xfeature order to pick the last */ 602 for_each_extended_xfeature_in_order(i, xfeatures) 603 topmost = xfeature_uncompact_order[i]; 604 offset = xstate_offsets[topmost]; 605 } 606 607 return offset + xstate_sizes[topmost]; 608 } 609 610 /* 611 * This essentially double-checks what the cpu told us about 612 * how large the XSAVE buffer needs to be. We are recalculating 613 * it to be safe. 614 * 615 * Independent XSAVE features allocate their own buffers and are not 616 * covered by these checks. Only the size of the buffer for task->fpu 617 * is checked here. 618 */ 619 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) 620 { 621 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 622 bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); 623 unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; 624 int i; 625 626 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { 627 if (!check_xstate_against_struct(i)) 628 return false; 629 /* 630 * Supervisor state components can be managed only by 631 * XSAVES. 632 */ 633 if (!xsaves && xfeature_is_supervisor(i)) { 634 XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i); 635 return false; 636 } 637 } 638 size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); 639 XSTATE_WARN_ON(size != kernel_size, 640 "size %u != kernel_size %u\n", size, kernel_size); 641 return size == kernel_size; 642 } 643 644 /* 645 * Get total size of enabled xstates in XCR0 | IA32_XSS. 646 * 647 * Note the SDM's wording here. "sub-function 0" only enumerates 648 * the size of the *user* states. If we use it to size a buffer 649 * that we use 'XSAVES' on, we could potentially overflow the 650 * buffer because 'XSAVES' saves system states too. 651 * 652 * This also takes compaction into account. So this works for 653 * XSAVEC as well. 654 */ 655 static unsigned int __init get_compacted_size(void) 656 { 657 unsigned int eax, ebx, ecx, edx; 658 /* 659 * - CPUID function 0DH, sub-function 1: 660 * EBX enumerates the size (in bytes) required by 661 * the XSAVES instruction for an XSAVE area 662 * containing all the state components 663 * corresponding to bits currently set in 664 * XCR0 | IA32_XSS. 665 * 666 * When XSAVES is not available but XSAVEC is (virt), then there 667 * are no supervisor states, but XSAVEC still uses compacted 668 * format. 669 */ 670 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx); 671 return ebx; 672 } 673 674 /* 675 * Get the total size of the enabled xstates without the independent supervisor 676 * features. 677 */ 678 static unsigned int __init get_xsave_compacted_size(void) 679 { 680 u64 mask = xfeatures_mask_independent(); 681 unsigned int size; 682 683 if (!mask) 684 return get_compacted_size(); 685 686 /* Disable independent features. */ 687 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor()); 688 689 /* 690 * Ask the hardware what size is required of the buffer. 691 * This is the size required for the task->fpu buffer. 692 */ 693 size = get_compacted_size(); 694 695 /* Re-enable independent features so XSAVES will work on them again. */ 696 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); 697 698 return size; 699 } 700 701 static unsigned int __init get_xsave_size_user(void) 702 { 703 unsigned int eax, ebx, ecx, edx; 704 /* 705 * - CPUID function 0DH, sub-function 0: 706 * EBX enumerates the size (in bytes) required by 707 * the XSAVE instruction for an XSAVE area 708 * containing all the *user* state components 709 * corresponding to bits currently set in XCR0. 710 */ 711 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx); 712 return ebx; 713 } 714 715 static int __init init_xstate_size(void) 716 { 717 /* Recompute the context size for enabled features: */ 718 unsigned int user_size, kernel_size, kernel_default_size; 719 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 720 721 /* Uncompacted user space size */ 722 user_size = get_xsave_size_user(); 723 724 /* 725 * XSAVES kernel size includes supervisor states and uses compacted 726 * format. XSAVEC uses compacted format, but does not save 727 * supervisor states. 728 * 729 * XSAVE[OPT] do not support supervisor states so kernel and user 730 * size is identical. 731 */ 732 if (compacted) 733 kernel_size = get_xsave_compacted_size(); 734 else 735 kernel_size = user_size; 736 737 kernel_default_size = 738 xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); 739 740 if (!paranoid_xstate_size_valid(kernel_size)) 741 return -EINVAL; 742 743 fpu_kernel_cfg.max_size = kernel_size; 744 fpu_user_cfg.max_size = user_size; 745 746 fpu_kernel_cfg.default_size = kernel_default_size; 747 fpu_user_cfg.default_size = 748 xstate_calculate_size(fpu_user_cfg.default_features, false); 749 750 guest_default_cfg.size = 751 xstate_calculate_size(guest_default_cfg.features, compacted); 752 753 return 0; 754 } 755 756 /* 757 * We enabled the XSAVE hardware, but something went wrong and 758 * we can not use it. Disable it. 759 */ 760 static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) 761 { 762 pr_info("x86/fpu: XSAVE disabled\n"); 763 764 fpu_kernel_cfg.max_features = 0; 765 cr4_clear_bits(X86_CR4_OSXSAVE); 766 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 767 768 /* Restore the legacy size.*/ 769 fpu_kernel_cfg.max_size = legacy_size; 770 fpu_kernel_cfg.default_size = legacy_size; 771 fpu_user_cfg.max_size = legacy_size; 772 fpu_user_cfg.default_size = legacy_size; 773 guest_default_cfg.size = legacy_size; 774 775 /* 776 * Prevent enabling the static branch which enables writes to the 777 * XFD MSR. 778 */ 779 init_fpstate.xfd = 0; 780 781 fpstate_reset(x86_task_fpu(current)); 782 } 783 784 static u64 __init host_default_mask(void) 785 { 786 /* 787 * Exclude dynamic features (require userspace opt-in) and features 788 * that are supported only for KVM guests. 789 */ 790 return ~((u64)XFEATURE_MASK_USER_DYNAMIC | XFEATURE_MASK_GUEST_SUPERVISOR); 791 } 792 793 static u64 __init guest_default_mask(void) 794 { 795 /* 796 * Exclude dynamic features, which require userspace opt-in even 797 * for KVM guests. 798 */ 799 return ~(u64)XFEATURE_MASK_USER_DYNAMIC; 800 } 801 802 /* 803 * Enable and initialize the xsave feature. 804 * Called once per system bootup. 805 */ 806 void __init fpu__init_system_xstate(unsigned int legacy_size) 807 { 808 unsigned int eax, ebx, ecx, edx; 809 u64 xfeatures; 810 int err; 811 int i; 812 813 if (!boot_cpu_has(X86_FEATURE_FPU)) { 814 pr_info("x86/fpu: No FPU detected\n"); 815 return; 816 } 817 818 if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 819 pr_info("x86/fpu: x87 FPU will use %s\n", 820 boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); 821 return; 822 } 823 824 /* 825 * Find user xstates supported by the processor. 826 */ 827 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx); 828 fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); 829 830 /* 831 * Find supervisor xstates supported by the processor. 832 */ 833 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx); 834 fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); 835 836 if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { 837 /* 838 * This indicates that something really unexpected happened 839 * with the enumeration. Disable XSAVE and try to continue 840 * booting without it. This is too early to BUG(). 841 */ 842 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", 843 fpu_kernel_cfg.max_features); 844 goto out_disable; 845 } 846 847 if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX && 848 fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) { 849 /* 850 * This is a problematic CPU configuration where two 851 * conflicting state components are both enumerated. 852 */ 853 pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n", 854 fpu_kernel_cfg.max_features); 855 goto out_disable; 856 } 857 858 fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & 859 XFEATURE_MASK_INDEPENDENT; 860 861 /* 862 * Clear XSAVE features that are disabled in the normal CPUID. 863 */ 864 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { 865 unsigned short cid = xsave_cpuid_features[i]; 866 867 /* Careful: X86_FEATURE_FPU is 0! */ 868 if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) 869 fpu_kernel_cfg.max_features &= ~BIT_ULL(i); 870 } 871 872 if (!cpu_feature_enabled(X86_FEATURE_XFD)) 873 fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; 874 875 if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) 876 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; 877 else 878 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | 879 XFEATURE_MASK_SUPERVISOR_SUPPORTED; 880 881 fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; 882 fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; 883 884 /* 885 * Now, given maximum feature set, determine default values by 886 * applying default masks. 887 */ 888 fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features & host_default_mask(); 889 fpu_user_cfg.default_features = fpu_user_cfg.max_features & host_default_mask(); 890 guest_default_cfg.features = fpu_kernel_cfg.max_features & guest_default_mask(); 891 892 /* Store it for paranoia check at the end */ 893 xfeatures = fpu_kernel_cfg.max_features; 894 895 /* 896 * Initialize the default XFD state in initfp_state and enable the 897 * dynamic sizing mechanism if dynamic states are available. The 898 * static key cannot be enabled here because this runs before 899 * jump_label_init(). This is delayed to an initcall. 900 */ 901 init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; 902 903 /* Set up compaction feature bit */ 904 if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || 905 cpu_feature_enabled(X86_FEATURE_XSAVES)) 906 setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); 907 908 /* Enable xstate instructions to be able to continue with initialization: */ 909 fpu__init_cpu_xstate(); 910 911 /* Cache size, offset and flags for initialization */ 912 setup_xstate_cache(); 913 914 err = init_xstate_size(); 915 if (err) 916 goto out_disable; 917 918 /* 919 * Update info used for ptrace frames; use standard-format size and no 920 * supervisor xstates: 921 */ 922 update_regset_xstate_info(fpu_user_cfg.max_size, 923 fpu_user_cfg.max_features); 924 925 /* 926 * init_fpstate excludes dynamic states as they are large but init 927 * state is zero. 928 */ 929 init_fpstate.size = fpu_kernel_cfg.default_size; 930 init_fpstate.xfeatures = fpu_kernel_cfg.default_features; 931 932 if (init_fpstate.size > sizeof(init_fpstate.regs)) { 933 pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n", 934 sizeof(init_fpstate.regs), init_fpstate.size); 935 goto out_disable; 936 } 937 938 setup_init_fpu_buf(); 939 940 /* 941 * Paranoia check whether something in the setup modified the 942 * xfeatures mask. 943 */ 944 if (xfeatures != fpu_kernel_cfg.max_features) { 945 pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n", 946 xfeatures, fpu_kernel_cfg.max_features); 947 goto out_disable; 948 } 949 950 /* 951 * CPU capabilities initialization runs before FPU init. So 952 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely 953 * functional, set the feature bit so depending code works. 954 */ 955 setup_force_cpu_cap(X86_FEATURE_OSXSAVE); 956 957 print_xstate_offset_size(); 958 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", 959 fpu_kernel_cfg.max_features, 960 fpu_kernel_cfg.max_size, 961 boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); 962 return; 963 964 out_disable: 965 /* something went wrong, try to boot without any XSAVE support */ 966 fpu__init_disable_system_xstate(legacy_size); 967 } 968 969 /* 970 * Restore minimal FPU state after suspend: 971 */ 972 void fpu__resume_cpu(void) 973 { 974 /* 975 * Restore XCR0 on xsave capable CPUs: 976 */ 977 if (cpu_feature_enabled(X86_FEATURE_XSAVE)) 978 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); 979 980 /* 981 * Restore IA32_XSS. The same CPUID bit enumerates support 982 * of XSAVES and MSR_IA32_XSS. 983 */ 984 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { 985 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | 986 xfeatures_mask_independent()); 987 } 988 989 if (fpu_state_size_dynamic()) 990 wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd); 991 } 992 993 /* 994 * Given an xstate feature nr, calculate where in the xsave 995 * buffer the state is. Callers should ensure that the buffer 996 * is valid. 997 */ 998 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 999 { 1000 u64 xcomp_bv = xsave->header.xcomp_bv; 1001 1002 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1003 return NULL; 1004 1005 if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { 1006 if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) 1007 return NULL; 1008 } 1009 1010 return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr); 1011 } 1012 1013 /* 1014 * Given the xsave area and a state inside, this function returns the 1015 * address of the state. 1016 * 1017 * This is the API that is called to get xstate address in either 1018 * standard format or compacted format of xsave area. 1019 * 1020 * Note that if there is no data for the field in the xsave buffer 1021 * this will return NULL. 1022 * 1023 * Inputs: 1024 * xstate: the thread's storage area for all FPU data 1025 * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, 1026 * XFEATURE_SSE, etc...) 1027 * Output: 1028 * address of the state in the xsave area, or NULL if the 1029 * field is not present in the xsave buffer. 1030 */ 1031 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 1032 { 1033 /* 1034 * Do we even *have* xsave state? 1035 */ 1036 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 1037 return NULL; 1038 1039 /* 1040 * We should not ever be requesting features that we 1041 * have not enabled. 1042 */ 1043 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1044 return NULL; 1045 1046 /* 1047 * This assumes the last 'xsave*' instruction to 1048 * have requested that 'xfeature_nr' be saved. 1049 * If it did not, we might be seeing and old value 1050 * of the field in the buffer. 1051 * 1052 * This can happen because the last 'xsave' did not 1053 * request that this feature be saved (unlikely) 1054 * or because the "init optimization" caused it 1055 * to not be saved. 1056 */ 1057 if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) 1058 return NULL; 1059 1060 return __raw_xsave_addr(xsave, xfeature_nr); 1061 } 1062 EXPORT_SYMBOL_FOR_KVM(get_xsave_addr); 1063 1064 /* 1065 * Given an xstate feature nr, calculate where in the xsave buffer the state is. 1066 * The xsave buffer should be in standard format, not compacted (e.g. user mode 1067 * signal frames). 1068 */ 1069 void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr) 1070 { 1071 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1072 return NULL; 1073 1074 return (void __user *)xsave + xstate_offsets[xfeature_nr]; 1075 } 1076 1077 #ifdef CONFIG_ARCH_HAS_PKEYS 1078 1079 /* 1080 * This will go out and modify PKRU register to set the access 1081 * rights for @pkey to @init_val. 1082 */ 1083 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 1084 unsigned long init_val) 1085 { 1086 u32 old_pkru, new_pkru_bits = 0; 1087 int pkey_shift; 1088 1089 /* 1090 * This check implies XSAVE support. OSPKE only gets 1091 * set if we enable XSAVE and we enable PKU in XCR0. 1092 */ 1093 if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 1094 return -EINVAL; 1095 1096 /* 1097 * This code should only be called with valid 'pkey' 1098 * values originating from in-kernel users. Complain 1099 * if a bad value is observed. 1100 */ 1101 if (WARN_ON_ONCE(pkey >= arch_max_pkey())) 1102 return -EINVAL; 1103 1104 /* Set the bits we need in PKRU: */ 1105 if (init_val & PKEY_DISABLE_ACCESS) 1106 new_pkru_bits |= PKRU_AD_BIT; 1107 if (init_val & PKEY_DISABLE_WRITE) 1108 new_pkru_bits |= PKRU_WD_BIT; 1109 1110 /* Shift the bits in to the correct place in PKRU for pkey: */ 1111 pkey_shift = pkey * PKRU_BITS_PER_PKEY; 1112 new_pkru_bits <<= pkey_shift; 1113 1114 /* Get old PKRU and mask off any old bits in place: */ 1115 old_pkru = read_pkru(); 1116 old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); 1117 1118 /* Write old part along with new part: */ 1119 write_pkru(old_pkru | new_pkru_bits); 1120 1121 return 0; 1122 } 1123 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ 1124 1125 static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, 1126 void *init_xstate, unsigned int size) 1127 { 1128 membuf_write(to, from_xstate ? xstate : init_xstate, size); 1129 } 1130 1131 /** 1132 * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer 1133 * @to: membuf descriptor 1134 * @fpstate: The fpstate buffer from which to copy 1135 * @xfeatures: The mask of xfeatures to save (XSAVE mode only) 1136 * @pkru_val: The PKRU value to store in the PKRU component 1137 * @copy_mode: The requested copy mode 1138 * 1139 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming 1140 * format, i.e. from the kernel internal hardware dependent storage format 1141 * to the requested @mode. UABI XSTATE is always uncompacted! 1142 * 1143 * It supports partial copy but @to.pos always starts from zero. 1144 */ 1145 void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, 1146 u64 xfeatures, u32 pkru_val, 1147 enum xstate_copy_mode copy_mode) 1148 { 1149 const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); 1150 struct xregs_state *xinit = &init_fpstate.regs.xsave; 1151 struct xregs_state *xsave = &fpstate->regs.xsave; 1152 unsigned int zerofrom, i, xfeature; 1153 struct xstate_header header; 1154 u64 mask; 1155 1156 memset(&header, 0, sizeof(header)); 1157 header.xfeatures = xsave->header.xfeatures; 1158 1159 /* Mask out the feature bits depending on copy mode */ 1160 switch (copy_mode) { 1161 case XSTATE_COPY_FP: 1162 header.xfeatures &= XFEATURE_MASK_FP; 1163 break; 1164 1165 case XSTATE_COPY_FX: 1166 header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE; 1167 break; 1168 1169 case XSTATE_COPY_XSAVE: 1170 header.xfeatures &= fpstate->user_xfeatures & xfeatures; 1171 break; 1172 } 1173 1174 /* Copy FP state up to MXCSR */ 1175 copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387, 1176 &xinit->i387, off_mxcsr); 1177 1178 /* Copy MXCSR when SSE or YMM are set in the feature mask */ 1179 copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM), 1180 &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr, 1181 MXCSR_AND_FLAGS_SIZE); 1182 1183 /* Copy the remaining FP state */ 1184 copy_feature(header.xfeatures & XFEATURE_MASK_FP, 1185 &to, &xsave->i387.st_space, &xinit->i387.st_space, 1186 sizeof(xsave->i387.st_space)); 1187 1188 /* Copy the SSE state - shared with YMM, but independently managed */ 1189 copy_feature(header.xfeatures & XFEATURE_MASK_SSE, 1190 &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space, 1191 sizeof(xsave->i387.xmm_space)); 1192 1193 if (copy_mode != XSTATE_COPY_XSAVE) 1194 goto out; 1195 1196 /* Zero the padding area */ 1197 membuf_zero(&to, sizeof(xsave->i387.padding)); 1198 1199 /* Copy xsave->i387.sw_reserved */ 1200 membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved)); 1201 1202 /* Copy the user space relevant state of @xsave->header */ 1203 membuf_write(&to, &header, sizeof(header)); 1204 1205 zerofrom = offsetof(struct xregs_state, extended_state_area); 1206 1207 /* 1208 * This 'mask' indicates which states to copy from fpstate. 1209 * Those extended states that are not present in fpstate are 1210 * either disabled or initialized: 1211 * 1212 * In non-compacted format, disabled features still occupy 1213 * state space but there is no state to copy from in the 1214 * compacted init_fpstate. The gap tracking will zero these 1215 * states. 1216 * 1217 * The extended features have an all zeroes init state. Thus, 1218 * remove them from 'mask' to zero those features in the user 1219 * buffer instead of retrieving them from init_fpstate. 1220 */ 1221 mask = header.xfeatures; 1222 1223 for_each_extended_xfeature_in_order(i, mask) { 1224 xfeature = xfeature_uncompact_order[i]; 1225 /* 1226 * If there was a feature or alignment gap, zero the space 1227 * in the destination buffer. 1228 */ 1229 if (zerofrom < xstate_offsets[xfeature]) 1230 membuf_zero(&to, xstate_offsets[xfeature] - zerofrom); 1231 1232 if (xfeature == XFEATURE_PKRU) { 1233 struct pkru_state pkru = {0}; 1234 /* 1235 * PKRU is not necessarily up to date in the 1236 * XSAVE buffer. Use the provided value. 1237 */ 1238 pkru.pkru = pkru_val; 1239 membuf_write(&to, &pkru, sizeof(pkru)); 1240 } else { 1241 membuf_write(&to, 1242 __raw_xsave_addr(xsave, xfeature), 1243 xstate_sizes[xfeature]); 1244 } 1245 /* 1246 * Keep track of the last copied state in the non-compacted 1247 * target buffer for gap zeroing. 1248 */ 1249 zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature]; 1250 } 1251 1252 out: 1253 if (to.left) 1254 membuf_zero(&to, to.left); 1255 } 1256 1257 /** 1258 * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer 1259 * @to: membuf descriptor 1260 * @tsk: The task from which to copy the saved xstate 1261 * @copy_mode: The requested copy mode 1262 * 1263 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming 1264 * format, i.e. from the kernel internal hardware dependent storage format 1265 * to the requested @mode. UABI XSTATE is always uncompacted! 1266 * 1267 * It supports partial copy but @to.pos always starts from zero. 1268 */ 1269 void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, 1270 enum xstate_copy_mode copy_mode) 1271 { 1272 __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate, 1273 x86_task_fpu(tsk)->fpstate->user_xfeatures, 1274 tsk->thread.pkru, copy_mode); 1275 } 1276 1277 static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, 1278 const void *kbuf, const void __user *ubuf) 1279 { 1280 if (kbuf) { 1281 memcpy(dst, kbuf + offset, size); 1282 } else { 1283 if (copy_from_user(dst, ubuf + offset, size)) 1284 return -EFAULT; 1285 } 1286 return 0; 1287 } 1288 1289 1290 /** 1291 * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate 1292 * @fpstate: The fpstate buffer to copy to 1293 * @kbuf: The UABI format buffer, if it comes from the kernel 1294 * @ubuf: The UABI format buffer, if it comes from userspace 1295 * @pkru: The location to write the PKRU value to 1296 * 1297 * Converts from the UABI format into the kernel internal hardware 1298 * dependent format. 1299 * 1300 * This function ultimately has three different callers with distinct PKRU 1301 * behavior. 1302 * 1. When called from sigreturn the PKRU register will be restored from 1303 * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to 1304 * @fpstate is sufficient to cover this case, but the caller will also 1305 * pass a pointer to the thread_struct's pkru field in @pkru and updating 1306 * it is harmless. 1307 * 2. When called from ptrace the PKRU register will be restored from the 1308 * thread_struct's pkru field. A pointer to that is passed in @pkru. 1309 * The kernel will restore it manually, so the XRSTOR behavior that resets 1310 * the PKRU register to the hardware init value (0) if the corresponding 1311 * xfeatures bit is not set is emulated here. 1312 * 3. When called from KVM the PKRU register will be restored from the vcpu's 1313 * pkru field. A pointer to that is passed in @pkru. KVM hasn't used 1314 * XRSTOR and hasn't had the PKRU resetting behavior described above. To 1315 * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures 1316 * bit is not set. 1317 */ 1318 static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, 1319 const void __user *ubuf, u32 *pkru) 1320 { 1321 struct xregs_state *xsave = &fpstate->regs.xsave; 1322 unsigned int offset, size; 1323 struct xstate_header hdr; 1324 u64 mask; 1325 int i; 1326 1327 offset = offsetof(struct xregs_state, header); 1328 if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) 1329 return -EFAULT; 1330 1331 if (validate_user_xstate_header(&hdr, fpstate)) 1332 return -EINVAL; 1333 1334 /* Validate MXCSR when any of the related features is in use */ 1335 mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; 1336 if (hdr.xfeatures & mask) { 1337 u32 mxcsr[2]; 1338 1339 offset = offsetof(struct fxregs_state, mxcsr); 1340 if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf)) 1341 return -EFAULT; 1342 1343 /* Reserved bits in MXCSR must be zero. */ 1344 if (mxcsr[0] & ~mxcsr_feature_mask) 1345 return -EINVAL; 1346 1347 /* SSE and YMM require MXCSR even when FP is not in use. */ 1348 if (!(hdr.xfeatures & XFEATURE_MASK_FP)) { 1349 xsave->i387.mxcsr = mxcsr[0]; 1350 xsave->i387.mxcsr_mask = mxcsr[1]; 1351 } 1352 } 1353 1354 for (i = 0; i < XFEATURE_MAX; i++) { 1355 mask = BIT_ULL(i); 1356 1357 if (hdr.xfeatures & mask) { 1358 void *dst = __raw_xsave_addr(xsave, i); 1359 1360 offset = xstate_offsets[i]; 1361 size = xstate_sizes[i]; 1362 1363 if (copy_from_buffer(dst, offset, size, kbuf, ubuf)) 1364 return -EFAULT; 1365 } 1366 } 1367 1368 if (hdr.xfeatures & XFEATURE_MASK_PKRU) { 1369 struct pkru_state *xpkru; 1370 1371 xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); 1372 *pkru = xpkru->pkru; 1373 } else { 1374 /* 1375 * KVM may pass NULL here to indicate that it does not need 1376 * PKRU updated. 1377 */ 1378 if (pkru) 1379 *pkru = 0; 1380 } 1381 1382 /* 1383 * The state that came in from userspace was user-state only. 1384 * Mask all the user states out of 'xfeatures': 1385 */ 1386 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; 1387 1388 /* 1389 * Add back in the features that came in from userspace: 1390 */ 1391 xsave->header.xfeatures |= hdr.xfeatures; 1392 1393 return 0; 1394 } 1395 1396 /* 1397 * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] 1398 * format and copy to the target thread. Used by ptrace and KVM. 1399 */ 1400 int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) 1401 { 1402 return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); 1403 } 1404 1405 /* 1406 * Convert from a sigreturn standard-format user-space buffer to kernel 1407 * XSAVE[S] format and copy to the target thread. This is called from the 1408 * sigreturn() and rt_sigreturn() system calls. 1409 */ 1410 int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, 1411 const void __user *ubuf) 1412 { 1413 return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru); 1414 } 1415 1416 static bool validate_independent_components(u64 mask) 1417 { 1418 u64 xchk; 1419 1420 if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) 1421 return false; 1422 1423 xchk = ~xfeatures_mask_independent(); 1424 1425 if (WARN_ON_ONCE(!mask || mask & xchk)) 1426 return false; 1427 1428 return true; 1429 } 1430 1431 /** 1432 * xsaves - Save selected components to a kernel xstate buffer 1433 * @xstate: Pointer to the buffer 1434 * @mask: Feature mask to select the components to save 1435 * 1436 * The @xstate buffer must be 64 byte aligned and correctly initialized as 1437 * XSAVES does not write the full xstate header. Before first use the 1438 * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer 1439 * can #GP. 1440 * 1441 * The feature mask must be a subset of the independent features. 1442 */ 1443 void xsaves(struct xregs_state *xstate, u64 mask) 1444 { 1445 int err; 1446 1447 if (!validate_independent_components(mask)) 1448 return; 1449 1450 XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); 1451 WARN_ON_ONCE(err); 1452 } 1453 1454 /** 1455 * xrstors - Restore selected components from a kernel xstate buffer 1456 * @xstate: Pointer to the buffer 1457 * @mask: Feature mask to select the components to restore 1458 * 1459 * The @xstate buffer must be 64 byte aligned and correctly initialized 1460 * otherwise XRSTORS from that buffer can #GP. 1461 * 1462 * Proper usage is to restore the state which was saved with 1463 * xsaves() into @xstate. 1464 * 1465 * The feature mask must be a subset of the independent features. 1466 */ 1467 void xrstors(struct xregs_state *xstate, u64 mask) 1468 { 1469 int err; 1470 1471 if (!validate_independent_components(mask)) 1472 return; 1473 1474 XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); 1475 WARN_ON_ONCE(err); 1476 } 1477 1478 #if IS_ENABLED(CONFIG_KVM) 1479 void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature) 1480 { 1481 void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature); 1482 1483 if (addr) 1484 memset(addr, 0, xstate_sizes[xfeature]); 1485 } 1486 EXPORT_SYMBOL_FOR_KVM(fpstate_clear_xstate_component); 1487 #endif 1488 1489 #ifdef CONFIG_X86_64 1490 1491 #ifdef CONFIG_X86_DEBUG_FPU 1492 /* 1493 * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask 1494 * can safely operate on the @fpstate buffer. 1495 */ 1496 static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) 1497 { 1498 u64 xfd = __this_cpu_read(xfd_state); 1499 1500 if (fpstate->xfd == xfd) 1501 return true; 1502 1503 /* 1504 * The XFD MSR does not match fpstate->xfd. That's invalid when 1505 * the passed in fpstate is current's fpstate. 1506 */ 1507 if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd) 1508 return false; 1509 1510 /* 1511 * XRSTOR(S) from init_fpstate are always correct as it will just 1512 * bring all components into init state and not read from the 1513 * buffer. XSAVE(S) raises #PF after init. 1514 */ 1515 if (fpstate == &init_fpstate) 1516 return rstor; 1517 1518 /* 1519 * XSAVE(S): clone(), fpu_swap_kvm_fpstate() 1520 * XRSTORS(S): fpu_swap_kvm_fpstate() 1521 */ 1522 1523 /* 1524 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch 1525 * the buffer area for XFD-disabled state components. 1526 */ 1527 mask &= ~xfd; 1528 1529 /* 1530 * Remove features which are valid in fpstate. They 1531 * have space allocated in fpstate. 1532 */ 1533 mask &= ~fpstate->xfeatures; 1534 1535 /* 1536 * Any remaining state components in 'mask' might be written 1537 * by XSAVE/XRSTOR. Fail validation it found. 1538 */ 1539 return !mask; 1540 } 1541 1542 void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) 1543 { 1544 WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); 1545 } 1546 #endif /* CONFIG_X86_DEBUG_FPU */ 1547 1548 static int __init xfd_update_static_branch(void) 1549 { 1550 /* 1551 * If init_fpstate.xfd has bits set then dynamic features are 1552 * available and the dynamic sizing must be enabled. 1553 */ 1554 if (init_fpstate.xfd) 1555 static_branch_enable(&__fpu_state_size_dynamic); 1556 return 0; 1557 } 1558 arch_initcall(xfd_update_static_branch) 1559 1560 void fpstate_free(struct fpu *fpu) 1561 { 1562 if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) 1563 vfree(fpu->fpstate); 1564 } 1565 1566 /** 1567 * fpstate_realloc - Reallocate struct fpstate for the requested new features 1568 * 1569 * @xfeatures: A bitmap of xstate features which extend the enabled features 1570 * of that task 1571 * @ksize: The required size for the kernel buffer 1572 * @usize: The required size for user space buffers 1573 * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations 1574 * 1575 * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer 1576 * terminates quickly, vfree()-induced IPIs may be a concern, but tasks 1577 * with large states are likely to live longer. 1578 * 1579 * Returns: 0 on success, -ENOMEM on allocation error. 1580 */ 1581 static int fpstate_realloc(u64 xfeatures, unsigned int ksize, 1582 unsigned int usize, struct fpu_guest *guest_fpu) 1583 { 1584 struct fpu *fpu = x86_task_fpu(current); 1585 struct fpstate *curfps, *newfps = NULL; 1586 unsigned int fpsize; 1587 bool in_use; 1588 1589 fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); 1590 1591 newfps = vzalloc(fpsize); 1592 if (!newfps) 1593 return -ENOMEM; 1594 newfps->size = ksize; 1595 newfps->user_size = usize; 1596 newfps->is_valloc = true; 1597 1598 /* 1599 * When a guest FPU is supplied, use @guest_fpu->fpstate 1600 * as reference independent whether it is in use or not. 1601 */ 1602 curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate; 1603 1604 /* Determine whether @curfps is the active fpstate */ 1605 in_use = fpu->fpstate == curfps; 1606 1607 if (guest_fpu) { 1608 newfps->is_guest = true; 1609 newfps->is_confidential = curfps->is_confidential; 1610 newfps->in_use = curfps->in_use; 1611 guest_fpu->xfeatures |= xfeatures; 1612 guest_fpu->uabi_size = usize; 1613 } 1614 1615 fpregs_lock(); 1616 /* 1617 * If @curfps is in use, ensure that the current state is in the 1618 * registers before swapping fpstate as that might invalidate it 1619 * due to layout changes. 1620 */ 1621 if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD)) 1622 fpregs_restore_userregs(); 1623 1624 newfps->xfeatures = curfps->xfeatures | xfeatures; 1625 newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; 1626 newfps->xfd = curfps->xfd & ~xfeatures; 1627 1628 /* Do the final updates within the locked region */ 1629 xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); 1630 1631 if (guest_fpu) { 1632 guest_fpu->fpstate = newfps; 1633 /* If curfps is active, update the FPU fpstate pointer */ 1634 if (in_use) 1635 fpu->fpstate = newfps; 1636 } else { 1637 fpu->fpstate = newfps; 1638 } 1639 1640 if (in_use) 1641 xfd_update_state(fpu->fpstate); 1642 fpregs_unlock(); 1643 1644 /* Only free valloc'ed state */ 1645 if (curfps && curfps->is_valloc) 1646 vfree(curfps); 1647 1648 return 0; 1649 } 1650 1651 static int validate_sigaltstack(unsigned int usize) 1652 { 1653 struct task_struct *thread, *leader = current->group_leader; 1654 unsigned long framesize = get_sigframe_size(); 1655 1656 lockdep_assert_held(¤t->sighand->siglock); 1657 1658 /* get_sigframe_size() is based on fpu_user_cfg.max_size */ 1659 framesize -= fpu_user_cfg.max_size; 1660 framesize += usize; 1661 for_each_thread(leader, thread) { 1662 if (thread->sas_ss_size && thread->sas_ss_size < framesize) 1663 return -ENOSPC; 1664 } 1665 return 0; 1666 } 1667 1668 static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) 1669 { 1670 /* 1671 * This deliberately does not exclude !XSAVES as we still might 1672 * decide to optionally context switch XCR0 or talk the silicon 1673 * vendors into extending XFD for the pre AMX states, especially 1674 * AVX512. 1675 */ 1676 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 1677 struct fpu *fpu = x86_task_fpu(current->group_leader); 1678 struct fpu_state_perm *perm; 1679 unsigned int ksize, usize; 1680 u64 mask; 1681 int ret = 0; 1682 1683 /* Check whether fully enabled */ 1684 if ((permitted & requested) == requested) 1685 return 0; 1686 1687 /* 1688 * Calculate the resulting kernel state size. Note, @permitted also 1689 * contains supervisor xfeatures even though supervisor are always 1690 * permitted for kernel and guest FPUs, and never permitted for user 1691 * FPUs. 1692 */ 1693 mask = permitted | requested; 1694 ksize = xstate_calculate_size(mask, compacted); 1695 1696 /* 1697 * Calculate the resulting user state size. Take care not to clobber 1698 * the supervisor xfeatures in the new mask! 1699 */ 1700 usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false); 1701 1702 if (!guest) { 1703 ret = validate_sigaltstack(usize); 1704 if (ret) 1705 return ret; 1706 } 1707 1708 perm = guest ? &fpu->guest_perm : &fpu->perm; 1709 /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ 1710 WRITE_ONCE(perm->__state_perm, mask); 1711 /* Protected by sighand lock */ 1712 perm->__state_size = ksize; 1713 perm->__user_state_size = usize; 1714 return ret; 1715 } 1716 1717 /* 1718 * Permissions array to map facilities with more than one component 1719 */ 1720 static const u64 xstate_prctl_req[XFEATURE_MAX] = { 1721 [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, 1722 }; 1723 1724 static int xstate_request_perm(unsigned long idx, bool guest) 1725 { 1726 u64 permitted, requested; 1727 int ret; 1728 1729 if (idx >= XFEATURE_MAX) 1730 return -EINVAL; 1731 1732 /* 1733 * Look up the facility mask which can require more than 1734 * one xstate component. 1735 */ 1736 idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); 1737 requested = xstate_prctl_req[idx]; 1738 if (!requested) 1739 return -EOPNOTSUPP; 1740 1741 if ((fpu_user_cfg.max_features & requested) != requested) 1742 return -EOPNOTSUPP; 1743 1744 /* Lockless quick check */ 1745 permitted = xstate_get_group_perm(guest); 1746 if ((permitted & requested) == requested) 1747 return 0; 1748 1749 /* Protect against concurrent modifications */ 1750 spin_lock_irq(¤t->sighand->siglock); 1751 permitted = xstate_get_group_perm(guest); 1752 1753 /* First vCPU allocation locks the permissions. */ 1754 if (guest && (permitted & FPU_GUEST_PERM_LOCKED)) 1755 ret = -EBUSY; 1756 else 1757 ret = __xstate_request_perm(permitted, requested, guest); 1758 spin_unlock_irq(¤t->sighand->siglock); 1759 return ret; 1760 } 1761 1762 int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) 1763 { 1764 u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; 1765 struct fpu_state_perm *perm; 1766 unsigned int ksize, usize; 1767 struct fpu *fpu; 1768 1769 if (!xfd_event) { 1770 if (!guest_fpu) 1771 pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); 1772 return 0; 1773 } 1774 1775 /* Protect against concurrent modifications */ 1776 spin_lock_irq(¤t->sighand->siglock); 1777 1778 /* If not permitted let it die */ 1779 if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { 1780 spin_unlock_irq(¤t->sighand->siglock); 1781 return -EPERM; 1782 } 1783 1784 fpu = x86_task_fpu(current->group_leader); 1785 perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; 1786 ksize = perm->__state_size; 1787 usize = perm->__user_state_size; 1788 1789 /* 1790 * The feature is permitted. State size is sufficient. Dropping 1791 * the lock is safe here even if more features are added from 1792 * another task, the retrieved buffer sizes are valid for the 1793 * currently requested feature(s). 1794 */ 1795 spin_unlock_irq(¤t->sighand->siglock); 1796 1797 /* 1798 * Try to allocate a new fpstate. If that fails there is no way 1799 * out. 1800 */ 1801 if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) 1802 return -EFAULT; 1803 return 0; 1804 } 1805 1806 int xfd_enable_feature(u64 xfd_err) 1807 { 1808 return __xfd_enable_feature(xfd_err, NULL); 1809 } 1810 1811 #else /* CONFIG_X86_64 */ 1812 static inline int xstate_request_perm(unsigned long idx, bool guest) 1813 { 1814 return -EPERM; 1815 } 1816 #endif /* !CONFIG_X86_64 */ 1817 1818 u64 xstate_get_guest_group_perm(void) 1819 { 1820 return xstate_get_group_perm(true); 1821 } 1822 EXPORT_SYMBOL_FOR_KVM(xstate_get_guest_group_perm); 1823 1824 /** 1825 * fpu_xstate_prctl - xstate permission operations 1826 * @option: A subfunction of arch_prctl() 1827 * @arg2: option argument 1828 * Return: 0 if successful; otherwise, an error code 1829 * 1830 * Option arguments: 1831 * 1832 * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info 1833 * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info 1834 * ARCH_REQ_XCOMP_PERM: Facility number requested 1835 * 1836 * For facilities which require more than one XSTATE component, the request 1837 * must be the highest state component number related to that facility, 1838 * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and 1839 * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). 1840 */ 1841 long fpu_xstate_prctl(int option, unsigned long arg2) 1842 { 1843 u64 __user *uptr = (u64 __user *)arg2; 1844 u64 permitted, supported; 1845 unsigned long idx = arg2; 1846 bool guest = false; 1847 1848 switch (option) { 1849 case ARCH_GET_XCOMP_SUPP: 1850 supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; 1851 return put_user(supported, uptr); 1852 1853 case ARCH_GET_XCOMP_PERM: 1854 /* 1855 * Lockless snapshot as it can also change right after the 1856 * dropping the lock. 1857 */ 1858 permitted = xstate_get_host_group_perm(); 1859 permitted &= XFEATURE_MASK_USER_SUPPORTED; 1860 return put_user(permitted, uptr); 1861 1862 case ARCH_GET_XCOMP_GUEST_PERM: 1863 permitted = xstate_get_guest_group_perm(); 1864 permitted &= XFEATURE_MASK_USER_SUPPORTED; 1865 return put_user(permitted, uptr); 1866 1867 case ARCH_REQ_XCOMP_GUEST_PERM: 1868 guest = true; 1869 fallthrough; 1870 1871 case ARCH_REQ_XCOMP_PERM: 1872 if (!IS_ENABLED(CONFIG_X86_64)) 1873 return -EOPNOTSUPP; 1874 1875 return xstate_request_perm(idx, guest); 1876 1877 default: 1878 return -EINVAL; 1879 } 1880 } 1881 1882 #ifdef CONFIG_PROC_PID_ARCH_STATUS 1883 /* 1884 * Report the amount of time elapsed in millisecond since last AVX512 1885 * use in the task. Report -1 if no AVX-512 usage. 1886 */ 1887 static void avx512_status(struct seq_file *m, struct task_struct *task) 1888 { 1889 unsigned long timestamp; 1890 long delta = -1; 1891 1892 /* AVX-512 usage is not tracked for kernel threads. Don't report anything. */ 1893 if (task->flags & (PF_KTHREAD | PF_USER_WORKER)) 1894 return; 1895 1896 timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); 1897 1898 if (timestamp) { 1899 delta = (long)(jiffies - timestamp); 1900 /* 1901 * Cap to LONG_MAX if time difference > LONG_MAX 1902 */ 1903 if (delta < 0) 1904 delta = LONG_MAX; 1905 delta = jiffies_to_msecs(delta); 1906 } 1907 1908 seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); 1909 seq_putc(m, '\n'); 1910 } 1911 1912 /* 1913 * Report architecture specific information 1914 */ 1915 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, 1916 struct pid *pid, struct task_struct *task) 1917 { 1918 /* 1919 * Report AVX512 state if the processor and build option supported. 1920 */ 1921 if (cpu_feature_enabled(X86_FEATURE_AVX512F)) 1922 avx512_status(m, task); 1923 1924 return 0; 1925 } 1926 #endif /* CONFIG_PROC_PID_ARCH_STATUS */ 1927 1928 #ifdef CONFIG_COREDUMP 1929 static const char owner_name[] = "LINUX"; 1930 1931 /* 1932 * Dump type, size, offset and flag values for every xfeature that is present. 1933 */ 1934 static int dump_xsave_layout_desc(struct coredump_params *cprm) 1935 { 1936 int num_records = 0; 1937 int i; 1938 1939 for_each_extended_xfeature(i, fpu_user_cfg.max_features) { 1940 struct x86_xfeat_component xc = { 1941 .type = i, 1942 .size = xstate_sizes[i], 1943 .offset = xstate_offsets[i], 1944 /* reserved for future use */ 1945 .flags = 0, 1946 }; 1947 1948 if (!dump_emit(cprm, &xc, sizeof(xc))) 1949 return 0; 1950 1951 num_records++; 1952 } 1953 return num_records; 1954 } 1955 1956 static u32 get_xsave_desc_size(void) 1957 { 1958 u32 cnt = 0; 1959 u32 i; 1960 1961 for_each_extended_xfeature(i, fpu_user_cfg.max_features) 1962 cnt++; 1963 1964 return cnt * (sizeof(struct x86_xfeat_component)); 1965 } 1966 1967 int elf_coredump_extra_notes_write(struct coredump_params *cprm) 1968 { 1969 int num_records = 0; 1970 struct elf_note en; 1971 1972 if (!fpu_user_cfg.max_features) 1973 return 0; 1974 1975 en.n_namesz = sizeof(owner_name); 1976 en.n_descsz = get_xsave_desc_size(); 1977 en.n_type = NT_X86_XSAVE_LAYOUT; 1978 1979 if (!dump_emit(cprm, &en, sizeof(en))) 1980 return 1; 1981 if (!dump_emit(cprm, owner_name, en.n_namesz)) 1982 return 1; 1983 if (!dump_align(cprm, 4)) 1984 return 1; 1985 1986 num_records = dump_xsave_layout_desc(cprm); 1987 if (!num_records) 1988 return 1; 1989 1990 /* Total size should be equal to the number of records */ 1991 if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz) 1992 return 1; 1993 1994 return 0; 1995 } 1996 1997 int elf_coredump_extra_notes_size(void) 1998 { 1999 int size; 2000 2001 if (!fpu_user_cfg.max_features) 2002 return 0; 2003 2004 /* .note header */ 2005 size = sizeof(struct elf_note); 2006 /* Name plus alignment to 4 bytes */ 2007 size += roundup(sizeof(owner_name), 4); 2008 size += get_xsave_desc_size(); 2009 2010 return size; 2011 } 2012 #endif /* CONFIG_COREDUMP */ 2013