1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xsave/xrstor support. 4 * 5 * Author: Suresh Siddha <suresh.b.siddha@intel.com> 6 */ 7 #include <linux/bitops.h> 8 #include <linux/compat.h> 9 #include <linux/cpu.h> 10 #include <linux/mman.h> 11 #include <linux/nospec.h> 12 #include <linux/pkeys.h> 13 #include <linux/seq_file.h> 14 #include <linux/proc_fs.h> 15 #include <linux/vmalloc.h> 16 #include <linux/coredump.h> 17 #include <linux/sort.h> 18 19 #include <asm/fpu/api.h> 20 #include <asm/fpu/regset.h> 21 #include <asm/fpu/signal.h> 22 #include <asm/fpu/xcr.h> 23 24 #include <asm/cpuid/api.h> 25 #include <asm/msr.h> 26 #include <asm/tlbflush.h> 27 #include <asm/prctl.h> 28 #include <asm/elf.h> 29 30 #include <uapi/asm/elf.h> 31 32 #include "context.h" 33 #include "internal.h" 34 #include "legacy.h" 35 #include "xstate.h" 36 37 #define for_each_extended_xfeature(bit, mask) \ 38 (bit) = FIRST_EXTENDED_XFEATURE; \ 39 for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) 40 41 /* 42 * Although we spell it out in here, the Processor Trace 43 * xfeature is completely unused. We use other mechanisms 44 * to save/restore PT state in Linux. 45 */ 46 static const char *xfeature_names[] = 47 { 48 "x87 floating point registers", 49 "SSE registers", 50 "AVX registers", 51 "MPX bounds registers", 52 "MPX CSR", 53 "AVX-512 opmask", 54 "AVX-512 Hi256", 55 "AVX-512 ZMM_Hi256", 56 "Processor Trace (unused)", 57 "Protection Keys User registers", 58 "PASID state", 59 "Control-flow User registers", 60 "Control-flow Kernel registers (KVM only)", 61 "unknown xstate feature", 62 "unknown xstate feature", 63 "unknown xstate feature", 64 "unknown xstate feature", 65 "AMX Tile config", 66 "AMX Tile data", 67 "APX registers", 68 "unknown xstate feature", 69 }; 70 71 static unsigned short xsave_cpuid_features[] __initdata = { 72 [XFEATURE_FP] = X86_FEATURE_FPU, 73 [XFEATURE_SSE] = X86_FEATURE_XMM, 74 [XFEATURE_YMM] = X86_FEATURE_AVX, 75 [XFEATURE_BNDREGS] = X86_FEATURE_MPX, 76 [XFEATURE_BNDCSR] = X86_FEATURE_MPX, 77 [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, 78 [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, 79 [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, 80 [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, 81 [XFEATURE_PKRU] = X86_FEATURE_OSPKE, 82 [XFEATURE_PASID] = X86_FEATURE_ENQCMD, 83 [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, 84 [XFEATURE_CET_KERNEL] = X86_FEATURE_SHSTK, 85 [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, 86 [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, 87 [XFEATURE_APX] = X86_FEATURE_APX, 88 }; 89 90 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = 91 { [ 0 ... XFEATURE_MAX - 1] = -1}; 92 static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = 93 { [ 0 ... XFEATURE_MAX - 1] = -1}; 94 static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; 95 96 /* 97 * Ordering of xstate components in uncompacted format: The xfeature 98 * number does not necessarily indicate its position in the XSAVE buffer. 99 * This array defines the traversal order of xstate features. 100 */ 101 static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init = 102 { [ 0 ... XFEATURE_MAX - 1] = -1}; 103 104 static inline unsigned int next_xfeature_order(unsigned int i, u64 mask) 105 { 106 for (; xfeature_uncompact_order[i] != -1; i++) { 107 if (mask & BIT_ULL(xfeature_uncompact_order[i])) 108 break; 109 } 110 111 return i; 112 } 113 114 /* Iterate xstate features in uncompacted order: */ 115 #define for_each_extended_xfeature_in_order(i, mask) \ 116 for (i = 0; \ 117 i = next_xfeature_order(i, mask), \ 118 xfeature_uncompact_order[i] != -1; \ 119 i++) 120 121 #define XSTATE_FLAG_SUPERVISOR BIT(0) 122 #define XSTATE_FLAG_ALIGNED64 BIT(1) 123 124 /* 125 * Return whether the system supports a given xfeature. 126 * 127 * Also return the name of the (most advanced) feature that the caller requested: 128 */ 129 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) 130 { 131 u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; 132 133 if (unlikely(feature_name)) { 134 long xfeature_idx, max_idx; 135 u64 xfeatures_print; 136 /* 137 * So we use FLS here to be able to print the most advanced 138 * feature that was requested but is missing. So if a driver 139 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the 140 * missing AVX feature - this is the most informative message 141 * to users: 142 */ 143 if (xfeatures_missing) 144 xfeatures_print = xfeatures_missing; 145 else 146 xfeatures_print = xfeatures_needed; 147 148 xfeature_idx = fls64(xfeatures_print)-1; 149 max_idx = ARRAY_SIZE(xfeature_names)-1; 150 xfeature_idx = min(xfeature_idx, max_idx); 151 152 *feature_name = xfeature_names[xfeature_idx]; 153 } 154 155 if (xfeatures_missing) 156 return 0; 157 158 return 1; 159 } 160 EXPORT_SYMBOL_GPL(cpu_has_xfeatures); 161 162 static bool xfeature_is_aligned64(int xfeature_nr) 163 { 164 return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64; 165 } 166 167 static bool xfeature_is_supervisor(int xfeature_nr) 168 { 169 return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; 170 } 171 172 static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) 173 { 174 unsigned int offs, i; 175 176 /* 177 * Non-compacted format and legacy features use the cached fixed 178 * offsets. 179 */ 180 if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || 181 xfeature <= XFEATURE_SSE) 182 return xstate_offsets[xfeature]; 183 184 /* 185 * Compacted format offsets depend on the actual content of the 186 * compacted xsave area which is determined by the xcomp_bv header 187 * field. 188 */ 189 offs = FXSAVE_SIZE + XSAVE_HDR_SIZE; 190 for_each_extended_xfeature(i, xcomp_bv) { 191 if (xfeature_is_aligned64(i)) 192 offs = ALIGN(offs, 64); 193 if (i == xfeature) 194 break; 195 offs += xstate_sizes[i]; 196 } 197 return offs; 198 } 199 200 /* 201 * Enable the extended processor state save/restore feature. 202 * Called once per CPU onlining. 203 */ 204 void fpu__init_cpu_xstate(void) 205 { 206 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) 207 return; 208 209 cr4_set_bits(X86_CR4_OSXSAVE); 210 211 /* 212 * Must happen after CR4 setup and before xsetbv() to allow KVM 213 * lazy passthrough. Write independent of the dynamic state static 214 * key as that does not work on the boot CPU. This also ensures 215 * that any stale state is wiped out from XFD. Reset the per CPU 216 * xfd cache too. 217 */ 218 if (cpu_feature_enabled(X86_FEATURE_XFD)) 219 xfd_set_state(init_fpstate.xfd); 220 221 /* 222 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features 223 * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user 224 * states can be set here. 225 */ 226 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); 227 228 /* 229 * MSR_IA32_XSS sets supervisor states managed by XSAVES. 230 */ 231 if (boot_cpu_has(X86_FEATURE_XSAVES)) { 232 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | 233 xfeatures_mask_independent()); 234 } 235 } 236 237 static bool xfeature_enabled(enum xfeature xfeature) 238 { 239 return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); 240 } 241 242 static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2) 243 { 244 return xstate_offsets[*(unsigned int *)xfeature1] - 245 xstate_offsets[*(unsigned int *)xfeature2]; 246 } 247 248 /* 249 * Record the offsets and sizes of various xstates contained 250 * in the XSAVE state memory layout. Also, create an ordered 251 * list of xfeatures for handling out-of-order offsets. 252 */ 253 static void __init setup_xstate_cache(void) 254 { 255 u32 eax, ebx, ecx, edx, xfeature, i = 0; 256 /* 257 * The FP xstates and SSE xstates are legacy states. They are always 258 * in the fixed offsets in the xsave area in either compacted form 259 * or standard form. 260 */ 261 xstate_offsets[XFEATURE_FP] = 0; 262 xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, 263 xmm_space); 264 265 xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; 266 xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, 267 xmm_space); 268 269 for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) { 270 cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx); 271 272 xstate_sizes[xfeature] = eax; 273 xstate_flags[xfeature] = ecx; 274 275 /* 276 * If an xfeature is supervisor state, the offset in EBX is 277 * invalid, leave it to -1. 278 */ 279 if (xfeature_is_supervisor(xfeature)) 280 continue; 281 282 xstate_offsets[xfeature] = ebx; 283 284 /* Populate the list of xfeatures before sorting */ 285 xfeature_uncompact_order[i++] = xfeature; 286 } 287 288 /* 289 * Sort xfeatures by their offsets to support out-of-order 290 * offsets in the uncompacted format. 291 */ 292 sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL); 293 } 294 295 /* 296 * Print out all the supported xstate features: 297 */ 298 static void __init print_xstate_features(void) 299 { 300 int i; 301 302 for (i = 0; i < XFEATURE_MAX; i++) { 303 u64 mask = BIT_ULL(i); 304 const char *name; 305 306 if (cpu_has_xfeatures(mask, &name)) 307 pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", mask, name); 308 } 309 } 310 311 /* 312 * This check is important because it is easy to get XSTATE_* 313 * confused with XSTATE_BIT_*. 314 */ 315 #define CHECK_XFEATURE(nr) do { \ 316 WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ 317 WARN_ON(nr >= XFEATURE_MAX); \ 318 } while (0) 319 320 /* 321 * Print out xstate component offsets and sizes 322 */ 323 static void __init print_xstate_offset_size(void) 324 { 325 int i; 326 327 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { 328 pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", 329 i, xfeature_get_offset(fpu_kernel_cfg.max_features, i), 330 i, xstate_sizes[i]); 331 } 332 } 333 334 /* 335 * This function is called only during boot time when x86 caps are not set 336 * up and alternative can not be used yet. 337 */ 338 static __init void os_xrstor_booting(struct xregs_state *xstate) 339 { 340 u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; 341 u32 lmask = mask; 342 u32 hmask = mask >> 32; 343 int err; 344 345 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) 346 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); 347 else 348 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); 349 350 /* 351 * We should never fault when copying from a kernel buffer, and the FPU 352 * state we set at boot time should be valid. 353 */ 354 WARN_ON_FPU(err); 355 } 356 357 /* 358 * All supported features have either init state all zeros or are 359 * handled in setup_init_fpu() individually. This is an explicit 360 * feature list and does not use XFEATURE_MASK*SUPPORTED to catch 361 * newly added supported features at build time and make people 362 * actually look at the init state for the new feature. 363 */ 364 #define XFEATURES_INIT_FPSTATE_HANDLED \ 365 (XFEATURE_MASK_FP | \ 366 XFEATURE_MASK_SSE | \ 367 XFEATURE_MASK_YMM | \ 368 XFEATURE_MASK_OPMASK | \ 369 XFEATURE_MASK_ZMM_Hi256 | \ 370 XFEATURE_MASK_Hi16_ZMM | \ 371 XFEATURE_MASK_PKRU | \ 372 XFEATURE_MASK_BNDREGS | \ 373 XFEATURE_MASK_BNDCSR | \ 374 XFEATURE_MASK_PASID | \ 375 XFEATURE_MASK_CET_USER | \ 376 XFEATURE_MASK_CET_KERNEL | \ 377 XFEATURE_MASK_XTILE | \ 378 XFEATURE_MASK_APX) 379 380 /* 381 * setup the xstate image representing the init state 382 */ 383 static void __init setup_init_fpu_buf(void) 384 { 385 BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | 386 XFEATURE_MASK_SUPERVISOR_SUPPORTED) != 387 XFEATURES_INIT_FPSTATE_HANDLED); 388 389 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 390 return; 391 392 print_xstate_features(); 393 394 xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures); 395 396 /* 397 * Init all the features state with header.xfeatures being 0x0 398 */ 399 os_xrstor_booting(&init_fpstate.regs.xsave); 400 401 /* 402 * All components are now in init state. Read the state back so 403 * that init_fpstate contains all non-zero init state. This only 404 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because 405 * those use the init optimization which skips writing data for 406 * components in init state. 407 * 408 * XSAVE could be used, but that would require to reshuffle the 409 * data when XSAVEC/S is available because XSAVEC/S uses xstate 410 * compaction. But doing so is a pointless exercise because most 411 * components have an all zeros init state except for the legacy 412 * ones (FP and SSE). Those can be saved with FXSAVE into the 413 * legacy area. Adding new features requires to ensure that init 414 * state is all zeroes or if not to add the necessary handling 415 * here. 416 */ 417 fxsave(&init_fpstate.regs.fxsave); 418 } 419 420 int xfeature_size(int xfeature_nr) 421 { 422 u32 eax, ebx, ecx, edx; 423 424 CHECK_XFEATURE(xfeature_nr); 425 cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx); 426 return eax; 427 } 428 429 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ 430 static int validate_user_xstate_header(const struct xstate_header *hdr, 431 struct fpstate *fpstate) 432 { 433 /* No unknown or supervisor features may be set */ 434 if (hdr->xfeatures & ~fpstate->user_xfeatures) 435 return -EINVAL; 436 437 /* Userspace must use the uncompacted format */ 438 if (hdr->xcomp_bv) 439 return -EINVAL; 440 441 /* 442 * If 'reserved' is shrunken to add a new field, make sure to validate 443 * that new field here! 444 */ 445 BUILD_BUG_ON(sizeof(hdr->reserved) != 48); 446 447 /* No reserved bits may be set */ 448 if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 449 return -EINVAL; 450 451 return 0; 452 } 453 454 static void __init __xstate_dump_leaves(void) 455 { 456 int i; 457 u32 eax, ebx, ecx, edx; 458 static int should_dump = 1; 459 460 if (!should_dump) 461 return; 462 should_dump = 0; 463 /* 464 * Dump out a few leaves past the ones that we support 465 * just in case there are some goodies up there 466 */ 467 for (i = 0; i < XFEATURE_MAX + 10; i++) { 468 cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx); 469 pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", 470 CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx); 471 } 472 } 473 474 #define XSTATE_WARN_ON(x, fmt, ...) do { \ 475 if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \ 476 __xstate_dump_leaves(); \ 477 } \ 478 } while (0) 479 480 #define XCHECK_SZ(sz, nr, __struct) ({ \ 481 if (WARN_ONCE(sz != sizeof(__struct), \ 482 "[%s]: struct is %zu bytes, cpu state %d bytes\n", \ 483 xfeature_names[nr], sizeof(__struct), sz)) { \ 484 __xstate_dump_leaves(); \ 485 } \ 486 true; \ 487 }) 488 489 490 /** 491 * check_xtile_data_against_struct - Check tile data state size. 492 * 493 * Calculate the state size by multiplying the single tile size which is 494 * recorded in a C struct, and the number of tiles that the CPU informs. 495 * Compare the provided size with the calculation. 496 * 497 * @size: The tile data state size 498 * 499 * Returns: 0 on success, -EINVAL on mismatch. 500 */ 501 static int __init check_xtile_data_against_struct(int size) 502 { 503 u32 max_palid, palid, state_size; 504 u32 eax, ebx, ecx, edx; 505 u16 max_tile; 506 507 /* 508 * Check the maximum palette id: 509 * eax: the highest numbered palette subleaf. 510 */ 511 cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx); 512 513 /* 514 * Cross-check each tile size and find the maximum number of 515 * supported tiles. 516 */ 517 for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { 518 u16 tile_size, max; 519 520 /* 521 * Check the tile size info: 522 * eax[31:16]: bytes per title 523 * ebx[31:16]: the max names (or max number of tiles) 524 */ 525 cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx); 526 tile_size = eax >> 16; 527 max = ebx >> 16; 528 529 if (tile_size != sizeof(struct xtile_data)) { 530 pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", 531 __stringify(XFEATURE_XTILE_DATA), 532 sizeof(struct xtile_data), tile_size); 533 __xstate_dump_leaves(); 534 return -EINVAL; 535 } 536 537 if (max > max_tile) 538 max_tile = max; 539 } 540 541 state_size = sizeof(struct xtile_data) * max_tile; 542 if (size != state_size) { 543 pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", 544 __stringify(XFEATURE_XTILE_DATA), state_size, size); 545 __xstate_dump_leaves(); 546 return -EINVAL; 547 } 548 return 0; 549 } 550 551 /* 552 * We have a C struct for each 'xstate'. We need to ensure 553 * that our software representation matches what the CPU 554 * tells us about the state's size. 555 */ 556 static bool __init check_xstate_against_struct(int nr) 557 { 558 /* 559 * Ask the CPU for the size of the state. 560 */ 561 int sz = xfeature_size(nr); 562 563 /* 564 * Match each CPU state with the corresponding software 565 * structure. 566 */ 567 switch (nr) { 568 case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct); 569 case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state); 570 case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state); 571 case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state); 572 case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state); 573 case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state); 574 case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state); 575 case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); 576 case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); 577 case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); 578 case XFEATURE_CET_KERNEL: return XCHECK_SZ(sz, nr, struct cet_supervisor_state); 579 case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state); 580 case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; 581 default: 582 XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); 583 return false; 584 } 585 586 return true; 587 } 588 589 static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) 590 { 591 unsigned int topmost = fls64(xfeatures) - 1; 592 unsigned int offset, i; 593 594 if (topmost <= XFEATURE_SSE) 595 return sizeof(struct xregs_state); 596 597 if (compacted) { 598 offset = xfeature_get_offset(xfeatures, topmost); 599 } else { 600 /* Walk through the xfeature order to pick the last */ 601 for_each_extended_xfeature_in_order(i, xfeatures) 602 topmost = xfeature_uncompact_order[i]; 603 offset = xstate_offsets[topmost]; 604 } 605 606 return offset + xstate_sizes[topmost]; 607 } 608 609 /* 610 * This essentially double-checks what the cpu told us about 611 * how large the XSAVE buffer needs to be. We are recalculating 612 * it to be safe. 613 * 614 * Independent XSAVE features allocate their own buffers and are not 615 * covered by these checks. Only the size of the buffer for task->fpu 616 * is checked here. 617 */ 618 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) 619 { 620 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 621 bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); 622 unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; 623 int i; 624 625 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { 626 if (!check_xstate_against_struct(i)) 627 return false; 628 /* 629 * Supervisor state components can be managed only by 630 * XSAVES. 631 */ 632 if (!xsaves && xfeature_is_supervisor(i)) { 633 XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i); 634 return false; 635 } 636 } 637 size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); 638 XSTATE_WARN_ON(size != kernel_size, 639 "size %u != kernel_size %u\n", size, kernel_size); 640 return size == kernel_size; 641 } 642 643 /* 644 * Get total size of enabled xstates in XCR0 | IA32_XSS. 645 * 646 * Note the SDM's wording here. "sub-function 0" only enumerates 647 * the size of the *user* states. If we use it to size a buffer 648 * that we use 'XSAVES' on, we could potentially overflow the 649 * buffer because 'XSAVES' saves system states too. 650 * 651 * This also takes compaction into account. So this works for 652 * XSAVEC as well. 653 */ 654 static unsigned int __init get_compacted_size(void) 655 { 656 unsigned int eax, ebx, ecx, edx; 657 /* 658 * - CPUID function 0DH, sub-function 1: 659 * EBX enumerates the size (in bytes) required by 660 * the XSAVES instruction for an XSAVE area 661 * containing all the state components 662 * corresponding to bits currently set in 663 * XCR0 | IA32_XSS. 664 * 665 * When XSAVES is not available but XSAVEC is (virt), then there 666 * are no supervisor states, but XSAVEC still uses compacted 667 * format. 668 */ 669 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx); 670 return ebx; 671 } 672 673 /* 674 * Get the total size of the enabled xstates without the independent supervisor 675 * features. 676 */ 677 static unsigned int __init get_xsave_compacted_size(void) 678 { 679 u64 mask = xfeatures_mask_independent(); 680 unsigned int size; 681 682 if (!mask) 683 return get_compacted_size(); 684 685 /* Disable independent features. */ 686 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor()); 687 688 /* 689 * Ask the hardware what size is required of the buffer. 690 * This is the size required for the task->fpu buffer. 691 */ 692 size = get_compacted_size(); 693 694 /* Re-enable independent features so XSAVES will work on them again. */ 695 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); 696 697 return size; 698 } 699 700 static unsigned int __init get_xsave_size_user(void) 701 { 702 unsigned int eax, ebx, ecx, edx; 703 /* 704 * - CPUID function 0DH, sub-function 0: 705 * EBX enumerates the size (in bytes) required by 706 * the XSAVE instruction for an XSAVE area 707 * containing all the *user* state components 708 * corresponding to bits currently set in XCR0. 709 */ 710 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx); 711 return ebx; 712 } 713 714 static int __init init_xstate_size(void) 715 { 716 /* Recompute the context size for enabled features: */ 717 unsigned int user_size, kernel_size, kernel_default_size; 718 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 719 720 /* Uncompacted user space size */ 721 user_size = get_xsave_size_user(); 722 723 /* 724 * XSAVES kernel size includes supervisor states and uses compacted 725 * format. XSAVEC uses compacted format, but does not save 726 * supervisor states. 727 * 728 * XSAVE[OPT] do not support supervisor states so kernel and user 729 * size is identical. 730 */ 731 if (compacted) 732 kernel_size = get_xsave_compacted_size(); 733 else 734 kernel_size = user_size; 735 736 kernel_default_size = 737 xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); 738 739 if (!paranoid_xstate_size_valid(kernel_size)) 740 return -EINVAL; 741 742 fpu_kernel_cfg.max_size = kernel_size; 743 fpu_user_cfg.max_size = user_size; 744 745 fpu_kernel_cfg.default_size = kernel_default_size; 746 fpu_user_cfg.default_size = 747 xstate_calculate_size(fpu_user_cfg.default_features, false); 748 749 guest_default_cfg.size = 750 xstate_calculate_size(guest_default_cfg.features, compacted); 751 752 return 0; 753 } 754 755 /* 756 * We enabled the XSAVE hardware, but something went wrong and 757 * we can not use it. Disable it. 758 */ 759 static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) 760 { 761 pr_info("x86/fpu: XSAVE disabled\n"); 762 763 fpu_kernel_cfg.max_features = 0; 764 cr4_clear_bits(X86_CR4_OSXSAVE); 765 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 766 767 /* Restore the legacy size.*/ 768 fpu_kernel_cfg.max_size = legacy_size; 769 fpu_kernel_cfg.default_size = legacy_size; 770 fpu_user_cfg.max_size = legacy_size; 771 fpu_user_cfg.default_size = legacy_size; 772 guest_default_cfg.size = legacy_size; 773 774 /* 775 * Prevent enabling the static branch which enables writes to the 776 * XFD MSR. 777 */ 778 init_fpstate.xfd = 0; 779 780 fpstate_reset(x86_task_fpu(current)); 781 } 782 783 static u64 __init host_default_mask(void) 784 { 785 /* 786 * Exclude dynamic features (require userspace opt-in) and features 787 * that are supported only for KVM guests. 788 */ 789 return ~((u64)XFEATURE_MASK_USER_DYNAMIC | XFEATURE_MASK_GUEST_SUPERVISOR); 790 } 791 792 static u64 __init guest_default_mask(void) 793 { 794 /* 795 * Exclude dynamic features, which require userspace opt-in even 796 * for KVM guests. 797 */ 798 return ~(u64)XFEATURE_MASK_USER_DYNAMIC; 799 } 800 801 /* 802 * Enable and initialize the xsave feature. 803 * Called once per system bootup. 804 */ 805 void __init fpu__init_system_xstate(unsigned int legacy_size) 806 { 807 unsigned int eax, ebx, ecx, edx; 808 u64 xfeatures; 809 int err; 810 int i; 811 812 if (!boot_cpu_has(X86_FEATURE_FPU)) { 813 pr_info("x86/fpu: No FPU detected\n"); 814 return; 815 } 816 817 if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 818 pr_info("x86/fpu: x87 FPU will use %s\n", 819 boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); 820 return; 821 } 822 823 /* 824 * Find user xstates supported by the processor. 825 */ 826 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx); 827 fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); 828 829 /* 830 * Find supervisor xstates supported by the processor. 831 */ 832 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx); 833 fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); 834 835 if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { 836 /* 837 * This indicates that something really unexpected happened 838 * with the enumeration. Disable XSAVE and try to continue 839 * booting without it. This is too early to BUG(). 840 */ 841 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", 842 fpu_kernel_cfg.max_features); 843 goto out_disable; 844 } 845 846 if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX && 847 fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) { 848 /* 849 * This is a problematic CPU configuration where two 850 * conflicting state components are both enumerated. 851 */ 852 pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n", 853 fpu_kernel_cfg.max_features); 854 goto out_disable; 855 } 856 857 fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & 858 XFEATURE_MASK_INDEPENDENT; 859 860 /* 861 * Clear XSAVE features that are disabled in the normal CPUID. 862 */ 863 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { 864 unsigned short cid = xsave_cpuid_features[i]; 865 866 /* Careful: X86_FEATURE_FPU is 0! */ 867 if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) 868 fpu_kernel_cfg.max_features &= ~BIT_ULL(i); 869 } 870 871 if (!cpu_feature_enabled(X86_FEATURE_XFD)) 872 fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; 873 874 if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) 875 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; 876 else 877 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | 878 XFEATURE_MASK_SUPERVISOR_SUPPORTED; 879 880 fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; 881 fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; 882 883 /* 884 * Now, given maximum feature set, determine default values by 885 * applying default masks. 886 */ 887 fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features & host_default_mask(); 888 fpu_user_cfg.default_features = fpu_user_cfg.max_features & host_default_mask(); 889 guest_default_cfg.features = fpu_kernel_cfg.max_features & guest_default_mask(); 890 891 /* Store it for paranoia check at the end */ 892 xfeatures = fpu_kernel_cfg.max_features; 893 894 /* 895 * Initialize the default XFD state in initfp_state and enable the 896 * dynamic sizing mechanism if dynamic states are available. The 897 * static key cannot be enabled here because this runs before 898 * jump_label_init(). This is delayed to an initcall. 899 */ 900 init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; 901 902 /* Set up compaction feature bit */ 903 if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || 904 cpu_feature_enabled(X86_FEATURE_XSAVES)) 905 setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); 906 907 /* Enable xstate instructions to be able to continue with initialization: */ 908 fpu__init_cpu_xstate(); 909 910 /* Cache size, offset and flags for initialization */ 911 setup_xstate_cache(); 912 913 err = init_xstate_size(); 914 if (err) 915 goto out_disable; 916 917 /* 918 * Update info used for ptrace frames; use standard-format size and no 919 * supervisor xstates: 920 */ 921 update_regset_xstate_info(fpu_user_cfg.max_size, 922 fpu_user_cfg.max_features); 923 924 /* 925 * init_fpstate excludes dynamic states as they are large but init 926 * state is zero. 927 */ 928 init_fpstate.size = fpu_kernel_cfg.default_size; 929 init_fpstate.xfeatures = fpu_kernel_cfg.default_features; 930 931 if (init_fpstate.size > sizeof(init_fpstate.regs)) { 932 pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n", 933 sizeof(init_fpstate.regs), init_fpstate.size); 934 goto out_disable; 935 } 936 937 setup_init_fpu_buf(); 938 939 /* 940 * Paranoia check whether something in the setup modified the 941 * xfeatures mask. 942 */ 943 if (xfeatures != fpu_kernel_cfg.max_features) { 944 pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n", 945 xfeatures, fpu_kernel_cfg.max_features); 946 goto out_disable; 947 } 948 949 /* 950 * CPU capabilities initialization runs before FPU init. So 951 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely 952 * functional, set the feature bit so depending code works. 953 */ 954 setup_force_cpu_cap(X86_FEATURE_OSXSAVE); 955 956 print_xstate_offset_size(); 957 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", 958 fpu_kernel_cfg.max_features, 959 fpu_kernel_cfg.max_size, 960 boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); 961 return; 962 963 out_disable: 964 /* something went wrong, try to boot without any XSAVE support */ 965 fpu__init_disable_system_xstate(legacy_size); 966 } 967 968 /* 969 * Restore minimal FPU state after suspend: 970 */ 971 void fpu__resume_cpu(void) 972 { 973 /* 974 * Restore XCR0 on xsave capable CPUs: 975 */ 976 if (cpu_feature_enabled(X86_FEATURE_XSAVE)) 977 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); 978 979 /* 980 * Restore IA32_XSS. The same CPUID bit enumerates support 981 * of XSAVES and MSR_IA32_XSS. 982 */ 983 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { 984 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | 985 xfeatures_mask_independent()); 986 } 987 988 if (fpu_state_size_dynamic()) 989 wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd); 990 } 991 992 /* 993 * Given an xstate feature nr, calculate where in the xsave 994 * buffer the state is. Callers should ensure that the buffer 995 * is valid. 996 */ 997 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 998 { 999 u64 xcomp_bv = xsave->header.xcomp_bv; 1000 1001 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1002 return NULL; 1003 1004 if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { 1005 if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) 1006 return NULL; 1007 } 1008 1009 return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr); 1010 } 1011 1012 /* 1013 * Given the xsave area and a state inside, this function returns the 1014 * address of the state. 1015 * 1016 * This is the API that is called to get xstate address in either 1017 * standard format or compacted format of xsave area. 1018 * 1019 * Note that if there is no data for the field in the xsave buffer 1020 * this will return NULL. 1021 * 1022 * Inputs: 1023 * xstate: the thread's storage area for all FPU data 1024 * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, 1025 * XFEATURE_SSE, etc...) 1026 * Output: 1027 * address of the state in the xsave area, or NULL if the 1028 * field is not present in the xsave buffer. 1029 */ 1030 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 1031 { 1032 /* 1033 * Do we even *have* xsave state? 1034 */ 1035 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 1036 return NULL; 1037 1038 /* 1039 * We should not ever be requesting features that we 1040 * have not enabled. 1041 */ 1042 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1043 return NULL; 1044 1045 /* 1046 * This assumes the last 'xsave*' instruction to 1047 * have requested that 'xfeature_nr' be saved. 1048 * If it did not, we might be seeing and old value 1049 * of the field in the buffer. 1050 * 1051 * This can happen because the last 'xsave' did not 1052 * request that this feature be saved (unlikely) 1053 * or because the "init optimization" caused it 1054 * to not be saved. 1055 */ 1056 if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) 1057 return NULL; 1058 1059 return __raw_xsave_addr(xsave, xfeature_nr); 1060 } 1061 EXPORT_SYMBOL_GPL(get_xsave_addr); 1062 1063 /* 1064 * Given an xstate feature nr, calculate where in the xsave buffer the state is. 1065 * The xsave buffer should be in standard format, not compacted (e.g. user mode 1066 * signal frames). 1067 */ 1068 void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr) 1069 { 1070 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1071 return NULL; 1072 1073 return (void __user *)xsave + xstate_offsets[xfeature_nr]; 1074 } 1075 1076 #ifdef CONFIG_ARCH_HAS_PKEYS 1077 1078 /* 1079 * This will go out and modify PKRU register to set the access 1080 * rights for @pkey to @init_val. 1081 */ 1082 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 1083 unsigned long init_val) 1084 { 1085 u32 old_pkru, new_pkru_bits = 0; 1086 int pkey_shift; 1087 1088 /* 1089 * This check implies XSAVE support. OSPKE only gets 1090 * set if we enable XSAVE and we enable PKU in XCR0. 1091 */ 1092 if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 1093 return -EINVAL; 1094 1095 /* 1096 * This code should only be called with valid 'pkey' 1097 * values originating from in-kernel users. Complain 1098 * if a bad value is observed. 1099 */ 1100 if (WARN_ON_ONCE(pkey >= arch_max_pkey())) 1101 return -EINVAL; 1102 1103 /* Set the bits we need in PKRU: */ 1104 if (init_val & PKEY_DISABLE_ACCESS) 1105 new_pkru_bits |= PKRU_AD_BIT; 1106 if (init_val & PKEY_DISABLE_WRITE) 1107 new_pkru_bits |= PKRU_WD_BIT; 1108 1109 /* Shift the bits in to the correct place in PKRU for pkey: */ 1110 pkey_shift = pkey * PKRU_BITS_PER_PKEY; 1111 new_pkru_bits <<= pkey_shift; 1112 1113 /* Get old PKRU and mask off any old bits in place: */ 1114 old_pkru = read_pkru(); 1115 old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); 1116 1117 /* Write old part along with new part: */ 1118 write_pkru(old_pkru | new_pkru_bits); 1119 1120 return 0; 1121 } 1122 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ 1123 1124 static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, 1125 void *init_xstate, unsigned int size) 1126 { 1127 membuf_write(to, from_xstate ? xstate : init_xstate, size); 1128 } 1129 1130 /** 1131 * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer 1132 * @to: membuf descriptor 1133 * @fpstate: The fpstate buffer from which to copy 1134 * @xfeatures: The mask of xfeatures to save (XSAVE mode only) 1135 * @pkru_val: The PKRU value to store in the PKRU component 1136 * @copy_mode: The requested copy mode 1137 * 1138 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming 1139 * format, i.e. from the kernel internal hardware dependent storage format 1140 * to the requested @mode. UABI XSTATE is always uncompacted! 1141 * 1142 * It supports partial copy but @to.pos always starts from zero. 1143 */ 1144 void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, 1145 u64 xfeatures, u32 pkru_val, 1146 enum xstate_copy_mode copy_mode) 1147 { 1148 const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); 1149 struct xregs_state *xinit = &init_fpstate.regs.xsave; 1150 struct xregs_state *xsave = &fpstate->regs.xsave; 1151 unsigned int zerofrom, i, xfeature; 1152 struct xstate_header header; 1153 u64 mask; 1154 1155 memset(&header, 0, sizeof(header)); 1156 header.xfeatures = xsave->header.xfeatures; 1157 1158 /* Mask out the feature bits depending on copy mode */ 1159 switch (copy_mode) { 1160 case XSTATE_COPY_FP: 1161 header.xfeatures &= XFEATURE_MASK_FP; 1162 break; 1163 1164 case XSTATE_COPY_FX: 1165 header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE; 1166 break; 1167 1168 case XSTATE_COPY_XSAVE: 1169 header.xfeatures &= fpstate->user_xfeatures & xfeatures; 1170 break; 1171 } 1172 1173 /* Copy FP state up to MXCSR */ 1174 copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387, 1175 &xinit->i387, off_mxcsr); 1176 1177 /* Copy MXCSR when SSE or YMM are set in the feature mask */ 1178 copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM), 1179 &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr, 1180 MXCSR_AND_FLAGS_SIZE); 1181 1182 /* Copy the remaining FP state */ 1183 copy_feature(header.xfeatures & XFEATURE_MASK_FP, 1184 &to, &xsave->i387.st_space, &xinit->i387.st_space, 1185 sizeof(xsave->i387.st_space)); 1186 1187 /* Copy the SSE state - shared with YMM, but independently managed */ 1188 copy_feature(header.xfeatures & XFEATURE_MASK_SSE, 1189 &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space, 1190 sizeof(xsave->i387.xmm_space)); 1191 1192 if (copy_mode != XSTATE_COPY_XSAVE) 1193 goto out; 1194 1195 /* Zero the padding area */ 1196 membuf_zero(&to, sizeof(xsave->i387.padding)); 1197 1198 /* Copy xsave->i387.sw_reserved */ 1199 membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved)); 1200 1201 /* Copy the user space relevant state of @xsave->header */ 1202 membuf_write(&to, &header, sizeof(header)); 1203 1204 zerofrom = offsetof(struct xregs_state, extended_state_area); 1205 1206 /* 1207 * This 'mask' indicates which states to copy from fpstate. 1208 * Those extended states that are not present in fpstate are 1209 * either disabled or initialized: 1210 * 1211 * In non-compacted format, disabled features still occupy 1212 * state space but there is no state to copy from in the 1213 * compacted init_fpstate. The gap tracking will zero these 1214 * states. 1215 * 1216 * The extended features have an all zeroes init state. Thus, 1217 * remove them from 'mask' to zero those features in the user 1218 * buffer instead of retrieving them from init_fpstate. 1219 */ 1220 mask = header.xfeatures; 1221 1222 for_each_extended_xfeature_in_order(i, mask) { 1223 xfeature = xfeature_uncompact_order[i]; 1224 /* 1225 * If there was a feature or alignment gap, zero the space 1226 * in the destination buffer. 1227 */ 1228 if (zerofrom < xstate_offsets[xfeature]) 1229 membuf_zero(&to, xstate_offsets[xfeature] - zerofrom); 1230 1231 if (xfeature == XFEATURE_PKRU) { 1232 struct pkru_state pkru = {0}; 1233 /* 1234 * PKRU is not necessarily up to date in the 1235 * XSAVE buffer. Use the provided value. 1236 */ 1237 pkru.pkru = pkru_val; 1238 membuf_write(&to, &pkru, sizeof(pkru)); 1239 } else { 1240 membuf_write(&to, 1241 __raw_xsave_addr(xsave, xfeature), 1242 xstate_sizes[xfeature]); 1243 } 1244 /* 1245 * Keep track of the last copied state in the non-compacted 1246 * target buffer for gap zeroing. 1247 */ 1248 zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature]; 1249 } 1250 1251 out: 1252 if (to.left) 1253 membuf_zero(&to, to.left); 1254 } 1255 1256 /** 1257 * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer 1258 * @to: membuf descriptor 1259 * @tsk: The task from which to copy the saved xstate 1260 * @copy_mode: The requested copy mode 1261 * 1262 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming 1263 * format, i.e. from the kernel internal hardware dependent storage format 1264 * to the requested @mode. UABI XSTATE is always uncompacted! 1265 * 1266 * It supports partial copy but @to.pos always starts from zero. 1267 */ 1268 void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, 1269 enum xstate_copy_mode copy_mode) 1270 { 1271 __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate, 1272 x86_task_fpu(tsk)->fpstate->user_xfeatures, 1273 tsk->thread.pkru, copy_mode); 1274 } 1275 1276 static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, 1277 const void *kbuf, const void __user *ubuf) 1278 { 1279 if (kbuf) { 1280 memcpy(dst, kbuf + offset, size); 1281 } else { 1282 if (copy_from_user(dst, ubuf + offset, size)) 1283 return -EFAULT; 1284 } 1285 return 0; 1286 } 1287 1288 1289 /** 1290 * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate 1291 * @fpstate: The fpstate buffer to copy to 1292 * @kbuf: The UABI format buffer, if it comes from the kernel 1293 * @ubuf: The UABI format buffer, if it comes from userspace 1294 * @pkru: The location to write the PKRU value to 1295 * 1296 * Converts from the UABI format into the kernel internal hardware 1297 * dependent format. 1298 * 1299 * This function ultimately has three different callers with distinct PKRU 1300 * behavior. 1301 * 1. When called from sigreturn the PKRU register will be restored from 1302 * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to 1303 * @fpstate is sufficient to cover this case, but the caller will also 1304 * pass a pointer to the thread_struct's pkru field in @pkru and updating 1305 * it is harmless. 1306 * 2. When called from ptrace the PKRU register will be restored from the 1307 * thread_struct's pkru field. A pointer to that is passed in @pkru. 1308 * The kernel will restore it manually, so the XRSTOR behavior that resets 1309 * the PKRU register to the hardware init value (0) if the corresponding 1310 * xfeatures bit is not set is emulated here. 1311 * 3. When called from KVM the PKRU register will be restored from the vcpu's 1312 * pkru field. A pointer to that is passed in @pkru. KVM hasn't used 1313 * XRSTOR and hasn't had the PKRU resetting behavior described above. To 1314 * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures 1315 * bit is not set. 1316 */ 1317 static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, 1318 const void __user *ubuf, u32 *pkru) 1319 { 1320 struct xregs_state *xsave = &fpstate->regs.xsave; 1321 unsigned int offset, size; 1322 struct xstate_header hdr; 1323 u64 mask; 1324 int i; 1325 1326 offset = offsetof(struct xregs_state, header); 1327 if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) 1328 return -EFAULT; 1329 1330 if (validate_user_xstate_header(&hdr, fpstate)) 1331 return -EINVAL; 1332 1333 /* Validate MXCSR when any of the related features is in use */ 1334 mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; 1335 if (hdr.xfeatures & mask) { 1336 u32 mxcsr[2]; 1337 1338 offset = offsetof(struct fxregs_state, mxcsr); 1339 if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf)) 1340 return -EFAULT; 1341 1342 /* Reserved bits in MXCSR must be zero. */ 1343 if (mxcsr[0] & ~mxcsr_feature_mask) 1344 return -EINVAL; 1345 1346 /* SSE and YMM require MXCSR even when FP is not in use. */ 1347 if (!(hdr.xfeatures & XFEATURE_MASK_FP)) { 1348 xsave->i387.mxcsr = mxcsr[0]; 1349 xsave->i387.mxcsr_mask = mxcsr[1]; 1350 } 1351 } 1352 1353 for (i = 0; i < XFEATURE_MAX; i++) { 1354 mask = BIT_ULL(i); 1355 1356 if (hdr.xfeatures & mask) { 1357 void *dst = __raw_xsave_addr(xsave, i); 1358 1359 offset = xstate_offsets[i]; 1360 size = xstate_sizes[i]; 1361 1362 if (copy_from_buffer(dst, offset, size, kbuf, ubuf)) 1363 return -EFAULT; 1364 } 1365 } 1366 1367 if (hdr.xfeatures & XFEATURE_MASK_PKRU) { 1368 struct pkru_state *xpkru; 1369 1370 xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); 1371 *pkru = xpkru->pkru; 1372 } else { 1373 /* 1374 * KVM may pass NULL here to indicate that it does not need 1375 * PKRU updated. 1376 */ 1377 if (pkru) 1378 *pkru = 0; 1379 } 1380 1381 /* 1382 * The state that came in from userspace was user-state only. 1383 * Mask all the user states out of 'xfeatures': 1384 */ 1385 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; 1386 1387 /* 1388 * Add back in the features that came in from userspace: 1389 */ 1390 xsave->header.xfeatures |= hdr.xfeatures; 1391 1392 return 0; 1393 } 1394 1395 /* 1396 * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] 1397 * format and copy to the target thread. Used by ptrace and KVM. 1398 */ 1399 int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) 1400 { 1401 return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); 1402 } 1403 1404 /* 1405 * Convert from a sigreturn standard-format user-space buffer to kernel 1406 * XSAVE[S] format and copy to the target thread. This is called from the 1407 * sigreturn() and rt_sigreturn() system calls. 1408 */ 1409 int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, 1410 const void __user *ubuf) 1411 { 1412 return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru); 1413 } 1414 1415 static bool validate_independent_components(u64 mask) 1416 { 1417 u64 xchk; 1418 1419 if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) 1420 return false; 1421 1422 xchk = ~xfeatures_mask_independent(); 1423 1424 if (WARN_ON_ONCE(!mask || mask & xchk)) 1425 return false; 1426 1427 return true; 1428 } 1429 1430 /** 1431 * xsaves - Save selected components to a kernel xstate buffer 1432 * @xstate: Pointer to the buffer 1433 * @mask: Feature mask to select the components to save 1434 * 1435 * The @xstate buffer must be 64 byte aligned and correctly initialized as 1436 * XSAVES does not write the full xstate header. Before first use the 1437 * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer 1438 * can #GP. 1439 * 1440 * The feature mask must be a subset of the independent features. 1441 */ 1442 void xsaves(struct xregs_state *xstate, u64 mask) 1443 { 1444 int err; 1445 1446 if (!validate_independent_components(mask)) 1447 return; 1448 1449 XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); 1450 WARN_ON_ONCE(err); 1451 } 1452 1453 /** 1454 * xrstors - Restore selected components from a kernel xstate buffer 1455 * @xstate: Pointer to the buffer 1456 * @mask: Feature mask to select the components to restore 1457 * 1458 * The @xstate buffer must be 64 byte aligned and correctly initialized 1459 * otherwise XRSTORS from that buffer can #GP. 1460 * 1461 * Proper usage is to restore the state which was saved with 1462 * xsaves() into @xstate. 1463 * 1464 * The feature mask must be a subset of the independent features. 1465 */ 1466 void xrstors(struct xregs_state *xstate, u64 mask) 1467 { 1468 int err; 1469 1470 if (!validate_independent_components(mask)) 1471 return; 1472 1473 XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); 1474 WARN_ON_ONCE(err); 1475 } 1476 1477 #if IS_ENABLED(CONFIG_KVM) 1478 void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature) 1479 { 1480 void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature); 1481 1482 if (addr) 1483 memset(addr, 0, xstate_sizes[xfeature]); 1484 } 1485 EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); 1486 #endif 1487 1488 #ifdef CONFIG_X86_64 1489 1490 #ifdef CONFIG_X86_DEBUG_FPU 1491 /* 1492 * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask 1493 * can safely operate on the @fpstate buffer. 1494 */ 1495 static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) 1496 { 1497 u64 xfd = __this_cpu_read(xfd_state); 1498 1499 if (fpstate->xfd == xfd) 1500 return true; 1501 1502 /* 1503 * The XFD MSR does not match fpstate->xfd. That's invalid when 1504 * the passed in fpstate is current's fpstate. 1505 */ 1506 if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd) 1507 return false; 1508 1509 /* 1510 * XRSTOR(S) from init_fpstate are always correct as it will just 1511 * bring all components into init state and not read from the 1512 * buffer. XSAVE(S) raises #PF after init. 1513 */ 1514 if (fpstate == &init_fpstate) 1515 return rstor; 1516 1517 /* 1518 * XSAVE(S): clone(), fpu_swap_kvm_fpstate() 1519 * XRSTORS(S): fpu_swap_kvm_fpstate() 1520 */ 1521 1522 /* 1523 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch 1524 * the buffer area for XFD-disabled state components. 1525 */ 1526 mask &= ~xfd; 1527 1528 /* 1529 * Remove features which are valid in fpstate. They 1530 * have space allocated in fpstate. 1531 */ 1532 mask &= ~fpstate->xfeatures; 1533 1534 /* 1535 * Any remaining state components in 'mask' might be written 1536 * by XSAVE/XRSTOR. Fail validation it found. 1537 */ 1538 return !mask; 1539 } 1540 1541 void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) 1542 { 1543 WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); 1544 } 1545 #endif /* CONFIG_X86_DEBUG_FPU */ 1546 1547 static int __init xfd_update_static_branch(void) 1548 { 1549 /* 1550 * If init_fpstate.xfd has bits set then dynamic features are 1551 * available and the dynamic sizing must be enabled. 1552 */ 1553 if (init_fpstate.xfd) 1554 static_branch_enable(&__fpu_state_size_dynamic); 1555 return 0; 1556 } 1557 arch_initcall(xfd_update_static_branch) 1558 1559 void fpstate_free(struct fpu *fpu) 1560 { 1561 if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) 1562 vfree(fpu->fpstate); 1563 } 1564 1565 /** 1566 * fpstate_realloc - Reallocate struct fpstate for the requested new features 1567 * 1568 * @xfeatures: A bitmap of xstate features which extend the enabled features 1569 * of that task 1570 * @ksize: The required size for the kernel buffer 1571 * @usize: The required size for user space buffers 1572 * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations 1573 * 1574 * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer 1575 * terminates quickly, vfree()-induced IPIs may be a concern, but tasks 1576 * with large states are likely to live longer. 1577 * 1578 * Returns: 0 on success, -ENOMEM on allocation error. 1579 */ 1580 static int fpstate_realloc(u64 xfeatures, unsigned int ksize, 1581 unsigned int usize, struct fpu_guest *guest_fpu) 1582 { 1583 struct fpu *fpu = x86_task_fpu(current); 1584 struct fpstate *curfps, *newfps = NULL; 1585 unsigned int fpsize; 1586 bool in_use; 1587 1588 fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); 1589 1590 newfps = vzalloc(fpsize); 1591 if (!newfps) 1592 return -ENOMEM; 1593 newfps->size = ksize; 1594 newfps->user_size = usize; 1595 newfps->is_valloc = true; 1596 1597 /* 1598 * When a guest FPU is supplied, use @guest_fpu->fpstate 1599 * as reference independent whether it is in use or not. 1600 */ 1601 curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate; 1602 1603 /* Determine whether @curfps is the active fpstate */ 1604 in_use = fpu->fpstate == curfps; 1605 1606 if (guest_fpu) { 1607 newfps->is_guest = true; 1608 newfps->is_confidential = curfps->is_confidential; 1609 newfps->in_use = curfps->in_use; 1610 guest_fpu->xfeatures |= xfeatures; 1611 guest_fpu->uabi_size = usize; 1612 } 1613 1614 fpregs_lock(); 1615 /* 1616 * If @curfps is in use, ensure that the current state is in the 1617 * registers before swapping fpstate as that might invalidate it 1618 * due to layout changes. 1619 */ 1620 if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD)) 1621 fpregs_restore_userregs(); 1622 1623 newfps->xfeatures = curfps->xfeatures | xfeatures; 1624 newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; 1625 newfps->xfd = curfps->xfd & ~xfeatures; 1626 1627 /* Do the final updates within the locked region */ 1628 xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); 1629 1630 if (guest_fpu) { 1631 guest_fpu->fpstate = newfps; 1632 /* If curfps is active, update the FPU fpstate pointer */ 1633 if (in_use) 1634 fpu->fpstate = newfps; 1635 } else { 1636 fpu->fpstate = newfps; 1637 } 1638 1639 if (in_use) 1640 xfd_update_state(fpu->fpstate); 1641 fpregs_unlock(); 1642 1643 /* Only free valloc'ed state */ 1644 if (curfps && curfps->is_valloc) 1645 vfree(curfps); 1646 1647 return 0; 1648 } 1649 1650 static int validate_sigaltstack(unsigned int usize) 1651 { 1652 struct task_struct *thread, *leader = current->group_leader; 1653 unsigned long framesize = get_sigframe_size(); 1654 1655 lockdep_assert_held(¤t->sighand->siglock); 1656 1657 /* get_sigframe_size() is based on fpu_user_cfg.max_size */ 1658 framesize -= fpu_user_cfg.max_size; 1659 framesize += usize; 1660 for_each_thread(leader, thread) { 1661 if (thread->sas_ss_size && thread->sas_ss_size < framesize) 1662 return -ENOSPC; 1663 } 1664 return 0; 1665 } 1666 1667 static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) 1668 { 1669 /* 1670 * This deliberately does not exclude !XSAVES as we still might 1671 * decide to optionally context switch XCR0 or talk the silicon 1672 * vendors into extending XFD for the pre AMX states, especially 1673 * AVX512. 1674 */ 1675 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 1676 struct fpu *fpu = x86_task_fpu(current->group_leader); 1677 struct fpu_state_perm *perm; 1678 unsigned int ksize, usize; 1679 u64 mask; 1680 int ret = 0; 1681 1682 /* Check whether fully enabled */ 1683 if ((permitted & requested) == requested) 1684 return 0; 1685 1686 /* 1687 * Calculate the resulting kernel state size. Note, @permitted also 1688 * contains supervisor xfeatures even though supervisor are always 1689 * permitted for kernel and guest FPUs, and never permitted for user 1690 * FPUs. 1691 */ 1692 mask = permitted | requested; 1693 ksize = xstate_calculate_size(mask, compacted); 1694 1695 /* 1696 * Calculate the resulting user state size. Take care not to clobber 1697 * the supervisor xfeatures in the new mask! 1698 */ 1699 usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false); 1700 1701 if (!guest) { 1702 ret = validate_sigaltstack(usize); 1703 if (ret) 1704 return ret; 1705 } 1706 1707 perm = guest ? &fpu->guest_perm : &fpu->perm; 1708 /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ 1709 WRITE_ONCE(perm->__state_perm, mask); 1710 /* Protected by sighand lock */ 1711 perm->__state_size = ksize; 1712 perm->__user_state_size = usize; 1713 return ret; 1714 } 1715 1716 /* 1717 * Permissions array to map facilities with more than one component 1718 */ 1719 static const u64 xstate_prctl_req[XFEATURE_MAX] = { 1720 [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, 1721 }; 1722 1723 static int xstate_request_perm(unsigned long idx, bool guest) 1724 { 1725 u64 permitted, requested; 1726 int ret; 1727 1728 if (idx >= XFEATURE_MAX) 1729 return -EINVAL; 1730 1731 /* 1732 * Look up the facility mask which can require more than 1733 * one xstate component. 1734 */ 1735 idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); 1736 requested = xstate_prctl_req[idx]; 1737 if (!requested) 1738 return -EOPNOTSUPP; 1739 1740 if ((fpu_user_cfg.max_features & requested) != requested) 1741 return -EOPNOTSUPP; 1742 1743 /* Lockless quick check */ 1744 permitted = xstate_get_group_perm(guest); 1745 if ((permitted & requested) == requested) 1746 return 0; 1747 1748 /* Protect against concurrent modifications */ 1749 spin_lock_irq(¤t->sighand->siglock); 1750 permitted = xstate_get_group_perm(guest); 1751 1752 /* First vCPU allocation locks the permissions. */ 1753 if (guest && (permitted & FPU_GUEST_PERM_LOCKED)) 1754 ret = -EBUSY; 1755 else 1756 ret = __xstate_request_perm(permitted, requested, guest); 1757 spin_unlock_irq(¤t->sighand->siglock); 1758 return ret; 1759 } 1760 1761 int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) 1762 { 1763 u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; 1764 struct fpu_state_perm *perm; 1765 unsigned int ksize, usize; 1766 struct fpu *fpu; 1767 1768 if (!xfd_event) { 1769 if (!guest_fpu) 1770 pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); 1771 return 0; 1772 } 1773 1774 /* Protect against concurrent modifications */ 1775 spin_lock_irq(¤t->sighand->siglock); 1776 1777 /* If not permitted let it die */ 1778 if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { 1779 spin_unlock_irq(¤t->sighand->siglock); 1780 return -EPERM; 1781 } 1782 1783 fpu = x86_task_fpu(current->group_leader); 1784 perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; 1785 ksize = perm->__state_size; 1786 usize = perm->__user_state_size; 1787 1788 /* 1789 * The feature is permitted. State size is sufficient. Dropping 1790 * the lock is safe here even if more features are added from 1791 * another task, the retrieved buffer sizes are valid for the 1792 * currently requested feature(s). 1793 */ 1794 spin_unlock_irq(¤t->sighand->siglock); 1795 1796 /* 1797 * Try to allocate a new fpstate. If that fails there is no way 1798 * out. 1799 */ 1800 if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) 1801 return -EFAULT; 1802 return 0; 1803 } 1804 1805 int xfd_enable_feature(u64 xfd_err) 1806 { 1807 return __xfd_enable_feature(xfd_err, NULL); 1808 } 1809 1810 #else /* CONFIG_X86_64 */ 1811 static inline int xstate_request_perm(unsigned long idx, bool guest) 1812 { 1813 return -EPERM; 1814 } 1815 #endif /* !CONFIG_X86_64 */ 1816 1817 u64 xstate_get_guest_group_perm(void) 1818 { 1819 return xstate_get_group_perm(true); 1820 } 1821 EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); 1822 1823 /** 1824 * fpu_xstate_prctl - xstate permission operations 1825 * @option: A subfunction of arch_prctl() 1826 * @arg2: option argument 1827 * Return: 0 if successful; otherwise, an error code 1828 * 1829 * Option arguments: 1830 * 1831 * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info 1832 * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info 1833 * ARCH_REQ_XCOMP_PERM: Facility number requested 1834 * 1835 * For facilities which require more than one XSTATE component, the request 1836 * must be the highest state component number related to that facility, 1837 * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and 1838 * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). 1839 */ 1840 long fpu_xstate_prctl(int option, unsigned long arg2) 1841 { 1842 u64 __user *uptr = (u64 __user *)arg2; 1843 u64 permitted, supported; 1844 unsigned long idx = arg2; 1845 bool guest = false; 1846 1847 switch (option) { 1848 case ARCH_GET_XCOMP_SUPP: 1849 supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; 1850 return put_user(supported, uptr); 1851 1852 case ARCH_GET_XCOMP_PERM: 1853 /* 1854 * Lockless snapshot as it can also change right after the 1855 * dropping the lock. 1856 */ 1857 permitted = xstate_get_host_group_perm(); 1858 permitted &= XFEATURE_MASK_USER_SUPPORTED; 1859 return put_user(permitted, uptr); 1860 1861 case ARCH_GET_XCOMP_GUEST_PERM: 1862 permitted = xstate_get_guest_group_perm(); 1863 permitted &= XFEATURE_MASK_USER_SUPPORTED; 1864 return put_user(permitted, uptr); 1865 1866 case ARCH_REQ_XCOMP_GUEST_PERM: 1867 guest = true; 1868 fallthrough; 1869 1870 case ARCH_REQ_XCOMP_PERM: 1871 if (!IS_ENABLED(CONFIG_X86_64)) 1872 return -EOPNOTSUPP; 1873 1874 return xstate_request_perm(idx, guest); 1875 1876 default: 1877 return -EINVAL; 1878 } 1879 } 1880 1881 #ifdef CONFIG_PROC_PID_ARCH_STATUS 1882 /* 1883 * Report the amount of time elapsed in millisecond since last AVX512 1884 * use in the task. Report -1 if no AVX-512 usage. 1885 */ 1886 static void avx512_status(struct seq_file *m, struct task_struct *task) 1887 { 1888 unsigned long timestamp; 1889 long delta = -1; 1890 1891 /* AVX-512 usage is not tracked for kernel threads. Don't report anything. */ 1892 if (task->flags & (PF_KTHREAD | PF_USER_WORKER)) 1893 return; 1894 1895 timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); 1896 1897 if (timestamp) { 1898 delta = (long)(jiffies - timestamp); 1899 /* 1900 * Cap to LONG_MAX if time difference > LONG_MAX 1901 */ 1902 if (delta < 0) 1903 delta = LONG_MAX; 1904 delta = jiffies_to_msecs(delta); 1905 } 1906 1907 seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); 1908 seq_putc(m, '\n'); 1909 } 1910 1911 /* 1912 * Report architecture specific information 1913 */ 1914 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, 1915 struct pid *pid, struct task_struct *task) 1916 { 1917 /* 1918 * Report AVX512 state if the processor and build option supported. 1919 */ 1920 if (cpu_feature_enabled(X86_FEATURE_AVX512F)) 1921 avx512_status(m, task); 1922 1923 return 0; 1924 } 1925 #endif /* CONFIG_PROC_PID_ARCH_STATUS */ 1926 1927 #ifdef CONFIG_COREDUMP 1928 static const char owner_name[] = "LINUX"; 1929 1930 /* 1931 * Dump type, size, offset and flag values for every xfeature that is present. 1932 */ 1933 static int dump_xsave_layout_desc(struct coredump_params *cprm) 1934 { 1935 int num_records = 0; 1936 int i; 1937 1938 for_each_extended_xfeature(i, fpu_user_cfg.max_features) { 1939 struct x86_xfeat_component xc = { 1940 .type = i, 1941 .size = xstate_sizes[i], 1942 .offset = xstate_offsets[i], 1943 /* reserved for future use */ 1944 .flags = 0, 1945 }; 1946 1947 if (!dump_emit(cprm, &xc, sizeof(xc))) 1948 return 0; 1949 1950 num_records++; 1951 } 1952 return num_records; 1953 } 1954 1955 static u32 get_xsave_desc_size(void) 1956 { 1957 u32 cnt = 0; 1958 u32 i; 1959 1960 for_each_extended_xfeature(i, fpu_user_cfg.max_features) 1961 cnt++; 1962 1963 return cnt * (sizeof(struct x86_xfeat_component)); 1964 } 1965 1966 int elf_coredump_extra_notes_write(struct coredump_params *cprm) 1967 { 1968 int num_records = 0; 1969 struct elf_note en; 1970 1971 if (!fpu_user_cfg.max_features) 1972 return 0; 1973 1974 en.n_namesz = sizeof(owner_name); 1975 en.n_descsz = get_xsave_desc_size(); 1976 en.n_type = NT_X86_XSAVE_LAYOUT; 1977 1978 if (!dump_emit(cprm, &en, sizeof(en))) 1979 return 1; 1980 if (!dump_emit(cprm, owner_name, en.n_namesz)) 1981 return 1; 1982 if (!dump_align(cprm, 4)) 1983 return 1; 1984 1985 num_records = dump_xsave_layout_desc(cprm); 1986 if (!num_records) 1987 return 1; 1988 1989 /* Total size should be equal to the number of records */ 1990 if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz) 1991 return 1; 1992 1993 return 0; 1994 } 1995 1996 int elf_coredump_extra_notes_size(void) 1997 { 1998 int size; 1999 2000 if (!fpu_user_cfg.max_features) 2001 return 0; 2002 2003 /* .note header */ 2004 size = sizeof(struct elf_note); 2005 /* Name plus alignment to 4 bytes */ 2006 size += roundup(sizeof(owner_name), 4); 2007 size += get_xsave_desc_size(); 2008 2009 return size; 2010 } 2011 #endif /* CONFIG_COREDUMP */ 2012