1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * xsave/xrstor support. 4 * 5 * Author: Suresh Siddha <suresh.b.siddha@intel.com> 6 */ 7 #include <linux/bitops.h> 8 #include <linux/compat.h> 9 #include <linux/cpu.h> 10 #include <linux/mman.h> 11 #include <linux/nospec.h> 12 #include <linux/pkeys.h> 13 #include <linux/seq_file.h> 14 #include <linux/proc_fs.h> 15 #include <linux/vmalloc.h> 16 #include <linux/coredump.h> 17 #include <linux/sort.h> 18 19 #include <asm/fpu/api.h> 20 #include <asm/fpu/regset.h> 21 #include <asm/fpu/signal.h> 22 #include <asm/fpu/xcr.h> 23 24 #include <asm/cpuid/api.h> 25 #include <asm/msr.h> 26 #include <asm/tlbflush.h> 27 #include <asm/prctl.h> 28 #include <asm/elf.h> 29 30 #include <uapi/asm/elf.h> 31 32 #include "context.h" 33 #include "internal.h" 34 #include "legacy.h" 35 #include "xstate.h" 36 37 #define for_each_extended_xfeature(bit, mask) \ 38 (bit) = FIRST_EXTENDED_XFEATURE; \ 39 for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask)) 40 41 /* 42 * Although we spell it out in here, the Processor Trace 43 * xfeature is completely unused. We use other mechanisms 44 * to save/restore PT state in Linux. 45 */ 46 static const char *xfeature_names[] = 47 { 48 "x87 floating point registers", 49 "SSE registers", 50 "AVX registers", 51 "MPX bounds registers", 52 "MPX CSR", 53 "AVX-512 opmask", 54 "AVX-512 Hi256", 55 "AVX-512 ZMM_Hi256", 56 "Processor Trace (unused)", 57 "Protection Keys User registers", 58 "PASID state", 59 "Control-flow User registers", 60 "Control-flow Kernel registers (unused)", 61 "unknown xstate feature", 62 "unknown xstate feature", 63 "unknown xstate feature", 64 "unknown xstate feature", 65 "AMX Tile config", 66 "AMX Tile data", 67 "APX registers", 68 "unknown xstate feature", 69 }; 70 71 static unsigned short xsave_cpuid_features[] __initdata = { 72 [XFEATURE_FP] = X86_FEATURE_FPU, 73 [XFEATURE_SSE] = X86_FEATURE_XMM, 74 [XFEATURE_YMM] = X86_FEATURE_AVX, 75 [XFEATURE_BNDREGS] = X86_FEATURE_MPX, 76 [XFEATURE_BNDCSR] = X86_FEATURE_MPX, 77 [XFEATURE_OPMASK] = X86_FEATURE_AVX512F, 78 [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, 79 [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, 80 [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, 81 [XFEATURE_PKRU] = X86_FEATURE_OSPKE, 82 [XFEATURE_PASID] = X86_FEATURE_ENQCMD, 83 [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, 84 [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, 85 [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, 86 [XFEATURE_APX] = X86_FEATURE_APX, 87 }; 88 89 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = 90 { [ 0 ... XFEATURE_MAX - 1] = -1}; 91 static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init = 92 { [ 0 ... XFEATURE_MAX - 1] = -1}; 93 static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init; 94 95 /* 96 * Ordering of xstate components in uncompacted format: The xfeature 97 * number does not necessarily indicate its position in the XSAVE buffer. 98 * This array defines the traversal order of xstate features. 99 */ 100 static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init = 101 { [ 0 ... XFEATURE_MAX - 1] = -1}; 102 103 static inline unsigned int next_xfeature_order(unsigned int i, u64 mask) 104 { 105 for (; xfeature_uncompact_order[i] != -1; i++) { 106 if (mask & BIT_ULL(xfeature_uncompact_order[i])) 107 break; 108 } 109 110 return i; 111 } 112 113 /* Iterate xstate features in uncompacted order: */ 114 #define for_each_extended_xfeature_in_order(i, mask) \ 115 for (i = 0; \ 116 i = next_xfeature_order(i, mask), \ 117 xfeature_uncompact_order[i] != -1; \ 118 i++) 119 120 #define XSTATE_FLAG_SUPERVISOR BIT(0) 121 #define XSTATE_FLAG_ALIGNED64 BIT(1) 122 123 /* 124 * Return whether the system supports a given xfeature. 125 * 126 * Also return the name of the (most advanced) feature that the caller requested: 127 */ 128 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) 129 { 130 u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features; 131 132 if (unlikely(feature_name)) { 133 long xfeature_idx, max_idx; 134 u64 xfeatures_print; 135 /* 136 * So we use FLS here to be able to print the most advanced 137 * feature that was requested but is missing. So if a driver 138 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the 139 * missing AVX feature - this is the most informative message 140 * to users: 141 */ 142 if (xfeatures_missing) 143 xfeatures_print = xfeatures_missing; 144 else 145 xfeatures_print = xfeatures_needed; 146 147 xfeature_idx = fls64(xfeatures_print)-1; 148 max_idx = ARRAY_SIZE(xfeature_names)-1; 149 xfeature_idx = min(xfeature_idx, max_idx); 150 151 *feature_name = xfeature_names[xfeature_idx]; 152 } 153 154 if (xfeatures_missing) 155 return 0; 156 157 return 1; 158 } 159 EXPORT_SYMBOL_GPL(cpu_has_xfeatures); 160 161 static bool xfeature_is_aligned64(int xfeature_nr) 162 { 163 return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64; 164 } 165 166 static bool xfeature_is_supervisor(int xfeature_nr) 167 { 168 return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR; 169 } 170 171 static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature) 172 { 173 unsigned int offs, i; 174 175 /* 176 * Non-compacted format and legacy features use the cached fixed 177 * offsets. 178 */ 179 if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) || 180 xfeature <= XFEATURE_SSE) 181 return xstate_offsets[xfeature]; 182 183 /* 184 * Compacted format offsets depend on the actual content of the 185 * compacted xsave area which is determined by the xcomp_bv header 186 * field. 187 */ 188 offs = FXSAVE_SIZE + XSAVE_HDR_SIZE; 189 for_each_extended_xfeature(i, xcomp_bv) { 190 if (xfeature_is_aligned64(i)) 191 offs = ALIGN(offs, 64); 192 if (i == xfeature) 193 break; 194 offs += xstate_sizes[i]; 195 } 196 return offs; 197 } 198 199 /* 200 * Enable the extended processor state save/restore feature. 201 * Called once per CPU onlining. 202 */ 203 void fpu__init_cpu_xstate(void) 204 { 205 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features) 206 return; 207 208 cr4_set_bits(X86_CR4_OSXSAVE); 209 210 /* 211 * Must happen after CR4 setup and before xsetbv() to allow KVM 212 * lazy passthrough. Write independent of the dynamic state static 213 * key as that does not work on the boot CPU. This also ensures 214 * that any stale state is wiped out from XFD. Reset the per CPU 215 * xfd cache too. 216 */ 217 if (cpu_feature_enabled(X86_FEATURE_XFD)) 218 xfd_set_state(init_fpstate.xfd); 219 220 /* 221 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features 222 * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user 223 * states can be set here. 224 */ 225 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); 226 227 /* 228 * MSR_IA32_XSS sets supervisor states managed by XSAVES. 229 */ 230 if (boot_cpu_has(X86_FEATURE_XSAVES)) { 231 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | 232 xfeatures_mask_independent()); 233 } 234 } 235 236 static bool xfeature_enabled(enum xfeature xfeature) 237 { 238 return fpu_kernel_cfg.max_features & BIT_ULL(xfeature); 239 } 240 241 static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2) 242 { 243 return xstate_offsets[*(unsigned int *)xfeature1] - 244 xstate_offsets[*(unsigned int *)xfeature2]; 245 } 246 247 /* 248 * Record the offsets and sizes of various xstates contained 249 * in the XSAVE state memory layout. Also, create an ordered 250 * list of xfeatures for handling out-of-order offsets. 251 */ 252 static void __init setup_xstate_cache(void) 253 { 254 u32 eax, ebx, ecx, edx, xfeature, i = 0; 255 /* 256 * The FP xstates and SSE xstates are legacy states. They are always 257 * in the fixed offsets in the xsave area in either compacted form 258 * or standard form. 259 */ 260 xstate_offsets[XFEATURE_FP] = 0; 261 xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state, 262 xmm_space); 263 264 xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP]; 265 xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state, 266 xmm_space); 267 268 for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) { 269 cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx); 270 271 xstate_sizes[xfeature] = eax; 272 xstate_flags[xfeature] = ecx; 273 274 /* 275 * If an xfeature is supervisor state, the offset in EBX is 276 * invalid, leave it to -1. 277 */ 278 if (xfeature_is_supervisor(xfeature)) 279 continue; 280 281 xstate_offsets[xfeature] = ebx; 282 283 /* Populate the list of xfeatures before sorting */ 284 xfeature_uncompact_order[i++] = xfeature; 285 } 286 287 /* 288 * Sort xfeatures by their offsets to support out-of-order 289 * offsets in the uncompacted format. 290 */ 291 sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL); 292 } 293 294 /* 295 * Print out all the supported xstate features: 296 */ 297 static void __init print_xstate_features(void) 298 { 299 int i; 300 301 for (i = 0; i < XFEATURE_MAX; i++) { 302 u64 mask = BIT_ULL(i); 303 const char *name; 304 305 if (cpu_has_xfeatures(mask, &name)) 306 pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", mask, name); 307 } 308 } 309 310 /* 311 * This check is important because it is easy to get XSTATE_* 312 * confused with XSTATE_BIT_*. 313 */ 314 #define CHECK_XFEATURE(nr) do { \ 315 WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ 316 WARN_ON(nr >= XFEATURE_MAX); \ 317 } while (0) 318 319 /* 320 * Print out xstate component offsets and sizes 321 */ 322 static void __init print_xstate_offset_size(void) 323 { 324 int i; 325 326 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { 327 pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", 328 i, xfeature_get_offset(fpu_kernel_cfg.max_features, i), 329 i, xstate_sizes[i]); 330 } 331 } 332 333 /* 334 * This function is called only during boot time when x86 caps are not set 335 * up and alternative can not be used yet. 336 */ 337 static __init void os_xrstor_booting(struct xregs_state *xstate) 338 { 339 u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE; 340 u32 lmask = mask; 341 u32 hmask = mask >> 32; 342 int err; 343 344 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) 345 XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); 346 else 347 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); 348 349 /* 350 * We should never fault when copying from a kernel buffer, and the FPU 351 * state we set at boot time should be valid. 352 */ 353 WARN_ON_FPU(err); 354 } 355 356 /* 357 * All supported features have either init state all zeros or are 358 * handled in setup_init_fpu() individually. This is an explicit 359 * feature list and does not use XFEATURE_MASK*SUPPORTED to catch 360 * newly added supported features at build time and make people 361 * actually look at the init state for the new feature. 362 */ 363 #define XFEATURES_INIT_FPSTATE_HANDLED \ 364 (XFEATURE_MASK_FP | \ 365 XFEATURE_MASK_SSE | \ 366 XFEATURE_MASK_YMM | \ 367 XFEATURE_MASK_OPMASK | \ 368 XFEATURE_MASK_ZMM_Hi256 | \ 369 XFEATURE_MASK_Hi16_ZMM | \ 370 XFEATURE_MASK_PKRU | \ 371 XFEATURE_MASK_BNDREGS | \ 372 XFEATURE_MASK_BNDCSR | \ 373 XFEATURE_MASK_PASID | \ 374 XFEATURE_MASK_CET_USER | \ 375 XFEATURE_MASK_XTILE | \ 376 XFEATURE_MASK_APX) 377 378 /* 379 * setup the xstate image representing the init state 380 */ 381 static void __init setup_init_fpu_buf(void) 382 { 383 BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | 384 XFEATURE_MASK_SUPERVISOR_SUPPORTED) != 385 XFEATURES_INIT_FPSTATE_HANDLED); 386 387 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 388 return; 389 390 print_xstate_features(); 391 392 xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures); 393 394 /* 395 * Init all the features state with header.xfeatures being 0x0 396 */ 397 os_xrstor_booting(&init_fpstate.regs.xsave); 398 399 /* 400 * All components are now in init state. Read the state back so 401 * that init_fpstate contains all non-zero init state. This only 402 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because 403 * those use the init optimization which skips writing data for 404 * components in init state. 405 * 406 * XSAVE could be used, but that would require to reshuffle the 407 * data when XSAVEC/S is available because XSAVEC/S uses xstate 408 * compaction. But doing so is a pointless exercise because most 409 * components have an all zeros init state except for the legacy 410 * ones (FP and SSE). Those can be saved with FXSAVE into the 411 * legacy area. Adding new features requires to ensure that init 412 * state is all zeroes or if not to add the necessary handling 413 * here. 414 */ 415 fxsave(&init_fpstate.regs.fxsave); 416 } 417 418 int xfeature_size(int xfeature_nr) 419 { 420 u32 eax, ebx, ecx, edx; 421 422 CHECK_XFEATURE(xfeature_nr); 423 cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx); 424 return eax; 425 } 426 427 /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ 428 static int validate_user_xstate_header(const struct xstate_header *hdr, 429 struct fpstate *fpstate) 430 { 431 /* No unknown or supervisor features may be set */ 432 if (hdr->xfeatures & ~fpstate->user_xfeatures) 433 return -EINVAL; 434 435 /* Userspace must use the uncompacted format */ 436 if (hdr->xcomp_bv) 437 return -EINVAL; 438 439 /* 440 * If 'reserved' is shrunken to add a new field, make sure to validate 441 * that new field here! 442 */ 443 BUILD_BUG_ON(sizeof(hdr->reserved) != 48); 444 445 /* No reserved bits may be set */ 446 if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 447 return -EINVAL; 448 449 return 0; 450 } 451 452 static void __init __xstate_dump_leaves(void) 453 { 454 int i; 455 u32 eax, ebx, ecx, edx; 456 static int should_dump = 1; 457 458 if (!should_dump) 459 return; 460 should_dump = 0; 461 /* 462 * Dump out a few leaves past the ones that we support 463 * just in case there are some goodies up there 464 */ 465 for (i = 0; i < XFEATURE_MAX + 10; i++) { 466 cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx); 467 pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", 468 CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx); 469 } 470 } 471 472 #define XSTATE_WARN_ON(x, fmt, ...) do { \ 473 if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \ 474 __xstate_dump_leaves(); \ 475 } \ 476 } while (0) 477 478 #define XCHECK_SZ(sz, nr, __struct) ({ \ 479 if (WARN_ONCE(sz != sizeof(__struct), \ 480 "[%s]: struct is %zu bytes, cpu state %d bytes\n", \ 481 xfeature_names[nr], sizeof(__struct), sz)) { \ 482 __xstate_dump_leaves(); \ 483 } \ 484 true; \ 485 }) 486 487 488 /** 489 * check_xtile_data_against_struct - Check tile data state size. 490 * 491 * Calculate the state size by multiplying the single tile size which is 492 * recorded in a C struct, and the number of tiles that the CPU informs. 493 * Compare the provided size with the calculation. 494 * 495 * @size: The tile data state size 496 * 497 * Returns: 0 on success, -EINVAL on mismatch. 498 */ 499 static int __init check_xtile_data_against_struct(int size) 500 { 501 u32 max_palid, palid, state_size; 502 u32 eax, ebx, ecx, edx; 503 u16 max_tile; 504 505 /* 506 * Check the maximum palette id: 507 * eax: the highest numbered palette subleaf. 508 */ 509 cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx); 510 511 /* 512 * Cross-check each tile size and find the maximum number of 513 * supported tiles. 514 */ 515 for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { 516 u16 tile_size, max; 517 518 /* 519 * Check the tile size info: 520 * eax[31:16]: bytes per title 521 * ebx[31:16]: the max names (or max number of tiles) 522 */ 523 cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx); 524 tile_size = eax >> 16; 525 max = ebx >> 16; 526 527 if (tile_size != sizeof(struct xtile_data)) { 528 pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", 529 __stringify(XFEATURE_XTILE_DATA), 530 sizeof(struct xtile_data), tile_size); 531 __xstate_dump_leaves(); 532 return -EINVAL; 533 } 534 535 if (max > max_tile) 536 max_tile = max; 537 } 538 539 state_size = sizeof(struct xtile_data) * max_tile; 540 if (size != state_size) { 541 pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", 542 __stringify(XFEATURE_XTILE_DATA), state_size, size); 543 __xstate_dump_leaves(); 544 return -EINVAL; 545 } 546 return 0; 547 } 548 549 /* 550 * We have a C struct for each 'xstate'. We need to ensure 551 * that our software representation matches what the CPU 552 * tells us about the state's size. 553 */ 554 static bool __init check_xstate_against_struct(int nr) 555 { 556 /* 557 * Ask the CPU for the size of the state. 558 */ 559 int sz = xfeature_size(nr); 560 561 /* 562 * Match each CPU state with the corresponding software 563 * structure. 564 */ 565 switch (nr) { 566 case XFEATURE_YMM: return XCHECK_SZ(sz, nr, struct ymmh_struct); 567 case XFEATURE_BNDREGS: return XCHECK_SZ(sz, nr, struct mpx_bndreg_state); 568 case XFEATURE_BNDCSR: return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state); 569 case XFEATURE_OPMASK: return XCHECK_SZ(sz, nr, struct avx_512_opmask_state); 570 case XFEATURE_ZMM_Hi256: return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state); 571 case XFEATURE_Hi16_ZMM: return XCHECK_SZ(sz, nr, struct avx_512_hi16_state); 572 case XFEATURE_PKRU: return XCHECK_SZ(sz, nr, struct pkru_state); 573 case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state); 574 case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg); 575 case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state); 576 case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state); 577 case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true; 578 default: 579 XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr); 580 return false; 581 } 582 583 return true; 584 } 585 586 static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted) 587 { 588 unsigned int topmost = fls64(xfeatures) - 1; 589 unsigned int offset, i; 590 591 if (topmost <= XFEATURE_SSE) 592 return sizeof(struct xregs_state); 593 594 if (compacted) { 595 offset = xfeature_get_offset(xfeatures, topmost); 596 } else { 597 /* Walk through the xfeature order to pick the last */ 598 for_each_extended_xfeature_in_order(i, xfeatures) 599 topmost = xfeature_uncompact_order[i]; 600 offset = xstate_offsets[topmost]; 601 } 602 603 return offset + xstate_sizes[topmost]; 604 } 605 606 /* 607 * This essentially double-checks what the cpu told us about 608 * how large the XSAVE buffer needs to be. We are recalculating 609 * it to be safe. 610 * 611 * Independent XSAVE features allocate their own buffers and are not 612 * covered by these checks. Only the size of the buffer for task->fpu 613 * is checked here. 614 */ 615 static bool __init paranoid_xstate_size_valid(unsigned int kernel_size) 616 { 617 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 618 bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES); 619 unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE; 620 int i; 621 622 for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) { 623 if (!check_xstate_against_struct(i)) 624 return false; 625 /* 626 * Supervisor state components can be managed only by 627 * XSAVES. 628 */ 629 if (!xsaves && xfeature_is_supervisor(i)) { 630 XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i); 631 return false; 632 } 633 } 634 size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted); 635 XSTATE_WARN_ON(size != kernel_size, 636 "size %u != kernel_size %u\n", size, kernel_size); 637 return size == kernel_size; 638 } 639 640 /* 641 * Get total size of enabled xstates in XCR0 | IA32_XSS. 642 * 643 * Note the SDM's wording here. "sub-function 0" only enumerates 644 * the size of the *user* states. If we use it to size a buffer 645 * that we use 'XSAVES' on, we could potentially overflow the 646 * buffer because 'XSAVES' saves system states too. 647 * 648 * This also takes compaction into account. So this works for 649 * XSAVEC as well. 650 */ 651 static unsigned int __init get_compacted_size(void) 652 { 653 unsigned int eax, ebx, ecx, edx; 654 /* 655 * - CPUID function 0DH, sub-function 1: 656 * EBX enumerates the size (in bytes) required by 657 * the XSAVES instruction for an XSAVE area 658 * containing all the state components 659 * corresponding to bits currently set in 660 * XCR0 | IA32_XSS. 661 * 662 * When XSAVES is not available but XSAVEC is (virt), then there 663 * are no supervisor states, but XSAVEC still uses compacted 664 * format. 665 */ 666 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx); 667 return ebx; 668 } 669 670 /* 671 * Get the total size of the enabled xstates without the independent supervisor 672 * features. 673 */ 674 static unsigned int __init get_xsave_compacted_size(void) 675 { 676 u64 mask = xfeatures_mask_independent(); 677 unsigned int size; 678 679 if (!mask) 680 return get_compacted_size(); 681 682 /* Disable independent features. */ 683 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor()); 684 685 /* 686 * Ask the hardware what size is required of the buffer. 687 * This is the size required for the task->fpu buffer. 688 */ 689 size = get_compacted_size(); 690 691 /* Re-enable independent features so XSAVES will work on them again. */ 692 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask); 693 694 return size; 695 } 696 697 static unsigned int __init get_xsave_size_user(void) 698 { 699 unsigned int eax, ebx, ecx, edx; 700 /* 701 * - CPUID function 0DH, sub-function 0: 702 * EBX enumerates the size (in bytes) required by 703 * the XSAVE instruction for an XSAVE area 704 * containing all the *user* state components 705 * corresponding to bits currently set in XCR0. 706 */ 707 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx); 708 return ebx; 709 } 710 711 static int __init init_xstate_size(void) 712 { 713 /* Recompute the context size for enabled features: */ 714 unsigned int user_size, kernel_size, kernel_default_size; 715 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 716 717 /* Uncompacted user space size */ 718 user_size = get_xsave_size_user(); 719 720 /* 721 * XSAVES kernel size includes supervisor states and uses compacted 722 * format. XSAVEC uses compacted format, but does not save 723 * supervisor states. 724 * 725 * XSAVE[OPT] do not support supervisor states so kernel and user 726 * size is identical. 727 */ 728 if (compacted) 729 kernel_size = get_xsave_compacted_size(); 730 else 731 kernel_size = user_size; 732 733 kernel_default_size = 734 xstate_calculate_size(fpu_kernel_cfg.default_features, compacted); 735 736 if (!paranoid_xstate_size_valid(kernel_size)) 737 return -EINVAL; 738 739 fpu_kernel_cfg.max_size = kernel_size; 740 fpu_user_cfg.max_size = user_size; 741 742 fpu_kernel_cfg.default_size = kernel_default_size; 743 fpu_user_cfg.default_size = 744 xstate_calculate_size(fpu_user_cfg.default_features, false); 745 746 return 0; 747 } 748 749 /* 750 * We enabled the XSAVE hardware, but something went wrong and 751 * we can not use it. Disable it. 752 */ 753 static void __init fpu__init_disable_system_xstate(unsigned int legacy_size) 754 { 755 pr_info("x86/fpu: XSAVE disabled\n"); 756 757 fpu_kernel_cfg.max_features = 0; 758 cr4_clear_bits(X86_CR4_OSXSAVE); 759 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 760 761 /* Restore the legacy size.*/ 762 fpu_kernel_cfg.max_size = legacy_size; 763 fpu_kernel_cfg.default_size = legacy_size; 764 fpu_user_cfg.max_size = legacy_size; 765 fpu_user_cfg.default_size = legacy_size; 766 767 /* 768 * Prevent enabling the static branch which enables writes to the 769 * XFD MSR. 770 */ 771 init_fpstate.xfd = 0; 772 773 fpstate_reset(x86_task_fpu(current)); 774 } 775 776 /* 777 * Enable and initialize the xsave feature. 778 * Called once per system bootup. 779 */ 780 void __init fpu__init_system_xstate(unsigned int legacy_size) 781 { 782 unsigned int eax, ebx, ecx, edx; 783 u64 xfeatures; 784 int err; 785 int i; 786 787 if (!boot_cpu_has(X86_FEATURE_FPU)) { 788 pr_info("x86/fpu: No FPU detected\n"); 789 return; 790 } 791 792 if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 793 pr_info("x86/fpu: x87 FPU will use %s\n", 794 boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); 795 return; 796 } 797 798 /* 799 * Find user xstates supported by the processor. 800 */ 801 cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx); 802 fpu_kernel_cfg.max_features = eax + ((u64)edx << 32); 803 804 /* 805 * Find supervisor xstates supported by the processor. 806 */ 807 cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx); 808 fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32); 809 810 if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { 811 /* 812 * This indicates that something really unexpected happened 813 * with the enumeration. Disable XSAVE and try to continue 814 * booting without it. This is too early to BUG(). 815 */ 816 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", 817 fpu_kernel_cfg.max_features); 818 goto out_disable; 819 } 820 821 if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX && 822 fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) { 823 /* 824 * This is a problematic CPU configuration where two 825 * conflicting state components are both enumerated. 826 */ 827 pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n", 828 fpu_kernel_cfg.max_features); 829 goto out_disable; 830 } 831 832 fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & 833 XFEATURE_MASK_INDEPENDENT; 834 835 /* 836 * Clear XSAVE features that are disabled in the normal CPUID. 837 */ 838 for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { 839 unsigned short cid = xsave_cpuid_features[i]; 840 841 /* Careful: X86_FEATURE_FPU is 0! */ 842 if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid)) 843 fpu_kernel_cfg.max_features &= ~BIT_ULL(i); 844 } 845 846 if (!cpu_feature_enabled(X86_FEATURE_XFD)) 847 fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC; 848 849 if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) 850 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; 851 else 852 fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED | 853 XFEATURE_MASK_SUPERVISOR_SUPPORTED; 854 855 fpu_user_cfg.max_features = fpu_kernel_cfg.max_features; 856 fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED; 857 858 /* Clean out dynamic features from default */ 859 fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features; 860 fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; 861 862 fpu_user_cfg.default_features = fpu_user_cfg.max_features; 863 fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC; 864 865 /* Store it for paranoia check at the end */ 866 xfeatures = fpu_kernel_cfg.max_features; 867 868 /* 869 * Initialize the default XFD state in initfp_state and enable the 870 * dynamic sizing mechanism if dynamic states are available. The 871 * static key cannot be enabled here because this runs before 872 * jump_label_init(). This is delayed to an initcall. 873 */ 874 init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC; 875 876 /* Set up compaction feature bit */ 877 if (cpu_feature_enabled(X86_FEATURE_XSAVEC) || 878 cpu_feature_enabled(X86_FEATURE_XSAVES)) 879 setup_force_cpu_cap(X86_FEATURE_XCOMPACTED); 880 881 /* Enable xstate instructions to be able to continue with initialization: */ 882 fpu__init_cpu_xstate(); 883 884 /* Cache size, offset and flags for initialization */ 885 setup_xstate_cache(); 886 887 err = init_xstate_size(); 888 if (err) 889 goto out_disable; 890 891 /* 892 * Update info used for ptrace frames; use standard-format size and no 893 * supervisor xstates: 894 */ 895 update_regset_xstate_info(fpu_user_cfg.max_size, 896 fpu_user_cfg.max_features); 897 898 /* 899 * init_fpstate excludes dynamic states as they are large but init 900 * state is zero. 901 */ 902 init_fpstate.size = fpu_kernel_cfg.default_size; 903 init_fpstate.xfeatures = fpu_kernel_cfg.default_features; 904 905 if (init_fpstate.size > sizeof(init_fpstate.regs)) { 906 pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n", 907 sizeof(init_fpstate.regs), init_fpstate.size); 908 goto out_disable; 909 } 910 911 setup_init_fpu_buf(); 912 913 /* 914 * Paranoia check whether something in the setup modified the 915 * xfeatures mask. 916 */ 917 if (xfeatures != fpu_kernel_cfg.max_features) { 918 pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n", 919 xfeatures, fpu_kernel_cfg.max_features); 920 goto out_disable; 921 } 922 923 /* 924 * CPU capabilities initialization runs before FPU init. So 925 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely 926 * functional, set the feature bit so depending code works. 927 */ 928 setup_force_cpu_cap(X86_FEATURE_OSXSAVE); 929 930 print_xstate_offset_size(); 931 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", 932 fpu_kernel_cfg.max_features, 933 fpu_kernel_cfg.max_size, 934 boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard"); 935 return; 936 937 out_disable: 938 /* something went wrong, try to boot without any XSAVE support */ 939 fpu__init_disable_system_xstate(legacy_size); 940 } 941 942 /* 943 * Restore minimal FPU state after suspend: 944 */ 945 void fpu__resume_cpu(void) 946 { 947 /* 948 * Restore XCR0 on xsave capable CPUs: 949 */ 950 if (cpu_feature_enabled(X86_FEATURE_XSAVE)) 951 xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features); 952 953 /* 954 * Restore IA32_XSS. The same CPUID bit enumerates support 955 * of XSAVES and MSR_IA32_XSS. 956 */ 957 if (cpu_feature_enabled(X86_FEATURE_XSAVES)) { 958 wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | 959 xfeatures_mask_independent()); 960 } 961 962 if (fpu_state_size_dynamic()) 963 wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd); 964 } 965 966 /* 967 * Given an xstate feature nr, calculate where in the xsave 968 * buffer the state is. Callers should ensure that the buffer 969 * is valid. 970 */ 971 static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 972 { 973 u64 xcomp_bv = xsave->header.xcomp_bv; 974 975 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 976 return NULL; 977 978 if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) { 979 if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr)))) 980 return NULL; 981 } 982 983 return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr); 984 } 985 986 /* 987 * Given the xsave area and a state inside, this function returns the 988 * address of the state. 989 * 990 * This is the API that is called to get xstate address in either 991 * standard format or compacted format of xsave area. 992 * 993 * Note that if there is no data for the field in the xsave buffer 994 * this will return NULL. 995 * 996 * Inputs: 997 * xstate: the thread's storage area for all FPU data 998 * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, 999 * XFEATURE_SSE, etc...) 1000 * Output: 1001 * address of the state in the xsave area, or NULL if the 1002 * field is not present in the xsave buffer. 1003 */ 1004 void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) 1005 { 1006 /* 1007 * Do we even *have* xsave state? 1008 */ 1009 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 1010 return NULL; 1011 1012 /* 1013 * We should not ever be requesting features that we 1014 * have not enabled. 1015 */ 1016 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1017 return NULL; 1018 1019 /* 1020 * This assumes the last 'xsave*' instruction to 1021 * have requested that 'xfeature_nr' be saved. 1022 * If it did not, we might be seeing and old value 1023 * of the field in the buffer. 1024 * 1025 * This can happen because the last 'xsave' did not 1026 * request that this feature be saved (unlikely) 1027 * or because the "init optimization" caused it 1028 * to not be saved. 1029 */ 1030 if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) 1031 return NULL; 1032 1033 return __raw_xsave_addr(xsave, xfeature_nr); 1034 } 1035 EXPORT_SYMBOL_GPL(get_xsave_addr); 1036 1037 /* 1038 * Given an xstate feature nr, calculate where in the xsave buffer the state is. 1039 * The xsave buffer should be in standard format, not compacted (e.g. user mode 1040 * signal frames). 1041 */ 1042 void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr) 1043 { 1044 if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr))) 1045 return NULL; 1046 1047 return (void __user *)xsave + xstate_offsets[xfeature_nr]; 1048 } 1049 1050 #ifdef CONFIG_ARCH_HAS_PKEYS 1051 1052 /* 1053 * This will go out and modify PKRU register to set the access 1054 * rights for @pkey to @init_val. 1055 */ 1056 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 1057 unsigned long init_val) 1058 { 1059 u32 old_pkru, new_pkru_bits = 0; 1060 int pkey_shift; 1061 1062 /* 1063 * This check implies XSAVE support. OSPKE only gets 1064 * set if we enable XSAVE and we enable PKU in XCR0. 1065 */ 1066 if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 1067 return -EINVAL; 1068 1069 /* 1070 * This code should only be called with valid 'pkey' 1071 * values originating from in-kernel users. Complain 1072 * if a bad value is observed. 1073 */ 1074 if (WARN_ON_ONCE(pkey >= arch_max_pkey())) 1075 return -EINVAL; 1076 1077 /* Set the bits we need in PKRU: */ 1078 if (init_val & PKEY_DISABLE_ACCESS) 1079 new_pkru_bits |= PKRU_AD_BIT; 1080 if (init_val & PKEY_DISABLE_WRITE) 1081 new_pkru_bits |= PKRU_WD_BIT; 1082 1083 /* Shift the bits in to the correct place in PKRU for pkey: */ 1084 pkey_shift = pkey * PKRU_BITS_PER_PKEY; 1085 new_pkru_bits <<= pkey_shift; 1086 1087 /* Get old PKRU and mask off any old bits in place: */ 1088 old_pkru = read_pkru(); 1089 old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); 1090 1091 /* Write old part along with new part: */ 1092 write_pkru(old_pkru | new_pkru_bits); 1093 1094 return 0; 1095 } 1096 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ 1097 1098 static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, 1099 void *init_xstate, unsigned int size) 1100 { 1101 membuf_write(to, from_xstate ? xstate : init_xstate, size); 1102 } 1103 1104 /** 1105 * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer 1106 * @to: membuf descriptor 1107 * @fpstate: The fpstate buffer from which to copy 1108 * @xfeatures: The mask of xfeatures to save (XSAVE mode only) 1109 * @pkru_val: The PKRU value to store in the PKRU component 1110 * @copy_mode: The requested copy mode 1111 * 1112 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming 1113 * format, i.e. from the kernel internal hardware dependent storage format 1114 * to the requested @mode. UABI XSTATE is always uncompacted! 1115 * 1116 * It supports partial copy but @to.pos always starts from zero. 1117 */ 1118 void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, 1119 u64 xfeatures, u32 pkru_val, 1120 enum xstate_copy_mode copy_mode) 1121 { 1122 const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); 1123 struct xregs_state *xinit = &init_fpstate.regs.xsave; 1124 struct xregs_state *xsave = &fpstate->regs.xsave; 1125 unsigned int zerofrom, i, xfeature; 1126 struct xstate_header header; 1127 u64 mask; 1128 1129 memset(&header, 0, sizeof(header)); 1130 header.xfeatures = xsave->header.xfeatures; 1131 1132 /* Mask out the feature bits depending on copy mode */ 1133 switch (copy_mode) { 1134 case XSTATE_COPY_FP: 1135 header.xfeatures &= XFEATURE_MASK_FP; 1136 break; 1137 1138 case XSTATE_COPY_FX: 1139 header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE; 1140 break; 1141 1142 case XSTATE_COPY_XSAVE: 1143 header.xfeatures &= fpstate->user_xfeatures & xfeatures; 1144 break; 1145 } 1146 1147 /* Copy FP state up to MXCSR */ 1148 copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387, 1149 &xinit->i387, off_mxcsr); 1150 1151 /* Copy MXCSR when SSE or YMM are set in the feature mask */ 1152 copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM), 1153 &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr, 1154 MXCSR_AND_FLAGS_SIZE); 1155 1156 /* Copy the remaining FP state */ 1157 copy_feature(header.xfeatures & XFEATURE_MASK_FP, 1158 &to, &xsave->i387.st_space, &xinit->i387.st_space, 1159 sizeof(xsave->i387.st_space)); 1160 1161 /* Copy the SSE state - shared with YMM, but independently managed */ 1162 copy_feature(header.xfeatures & XFEATURE_MASK_SSE, 1163 &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space, 1164 sizeof(xsave->i387.xmm_space)); 1165 1166 if (copy_mode != XSTATE_COPY_XSAVE) 1167 goto out; 1168 1169 /* Zero the padding area */ 1170 membuf_zero(&to, sizeof(xsave->i387.padding)); 1171 1172 /* Copy xsave->i387.sw_reserved */ 1173 membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved)); 1174 1175 /* Copy the user space relevant state of @xsave->header */ 1176 membuf_write(&to, &header, sizeof(header)); 1177 1178 zerofrom = offsetof(struct xregs_state, extended_state_area); 1179 1180 /* 1181 * This 'mask' indicates which states to copy from fpstate. 1182 * Those extended states that are not present in fpstate are 1183 * either disabled or initialized: 1184 * 1185 * In non-compacted format, disabled features still occupy 1186 * state space but there is no state to copy from in the 1187 * compacted init_fpstate. The gap tracking will zero these 1188 * states. 1189 * 1190 * The extended features have an all zeroes init state. Thus, 1191 * remove them from 'mask' to zero those features in the user 1192 * buffer instead of retrieving them from init_fpstate. 1193 */ 1194 mask = header.xfeatures; 1195 1196 for_each_extended_xfeature_in_order(i, mask) { 1197 xfeature = xfeature_uncompact_order[i]; 1198 /* 1199 * If there was a feature or alignment gap, zero the space 1200 * in the destination buffer. 1201 */ 1202 if (zerofrom < xstate_offsets[xfeature]) 1203 membuf_zero(&to, xstate_offsets[xfeature] - zerofrom); 1204 1205 if (xfeature == XFEATURE_PKRU) { 1206 struct pkru_state pkru = {0}; 1207 /* 1208 * PKRU is not necessarily up to date in the 1209 * XSAVE buffer. Use the provided value. 1210 */ 1211 pkru.pkru = pkru_val; 1212 membuf_write(&to, &pkru, sizeof(pkru)); 1213 } else { 1214 membuf_write(&to, 1215 __raw_xsave_addr(xsave, xfeature), 1216 xstate_sizes[xfeature]); 1217 } 1218 /* 1219 * Keep track of the last copied state in the non-compacted 1220 * target buffer for gap zeroing. 1221 */ 1222 zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature]; 1223 } 1224 1225 out: 1226 if (to.left) 1227 membuf_zero(&to, to.left); 1228 } 1229 1230 /** 1231 * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer 1232 * @to: membuf descriptor 1233 * @tsk: The task from which to copy the saved xstate 1234 * @copy_mode: The requested copy mode 1235 * 1236 * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming 1237 * format, i.e. from the kernel internal hardware dependent storage format 1238 * to the requested @mode. UABI XSTATE is always uncompacted! 1239 * 1240 * It supports partial copy but @to.pos always starts from zero. 1241 */ 1242 void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, 1243 enum xstate_copy_mode copy_mode) 1244 { 1245 __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate, 1246 x86_task_fpu(tsk)->fpstate->user_xfeatures, 1247 tsk->thread.pkru, copy_mode); 1248 } 1249 1250 static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, 1251 const void *kbuf, const void __user *ubuf) 1252 { 1253 if (kbuf) { 1254 memcpy(dst, kbuf + offset, size); 1255 } else { 1256 if (copy_from_user(dst, ubuf + offset, size)) 1257 return -EFAULT; 1258 } 1259 return 0; 1260 } 1261 1262 1263 /** 1264 * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate 1265 * @fpstate: The fpstate buffer to copy to 1266 * @kbuf: The UABI format buffer, if it comes from the kernel 1267 * @ubuf: The UABI format buffer, if it comes from userspace 1268 * @pkru: The location to write the PKRU value to 1269 * 1270 * Converts from the UABI format into the kernel internal hardware 1271 * dependent format. 1272 * 1273 * This function ultimately has three different callers with distinct PKRU 1274 * behavior. 1275 * 1. When called from sigreturn the PKRU register will be restored from 1276 * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to 1277 * @fpstate is sufficient to cover this case, but the caller will also 1278 * pass a pointer to the thread_struct's pkru field in @pkru and updating 1279 * it is harmless. 1280 * 2. When called from ptrace the PKRU register will be restored from the 1281 * thread_struct's pkru field. A pointer to that is passed in @pkru. 1282 * The kernel will restore it manually, so the XRSTOR behavior that resets 1283 * the PKRU register to the hardware init value (0) if the corresponding 1284 * xfeatures bit is not set is emulated here. 1285 * 3. When called from KVM the PKRU register will be restored from the vcpu's 1286 * pkru field. A pointer to that is passed in @pkru. KVM hasn't used 1287 * XRSTOR and hasn't had the PKRU resetting behavior described above. To 1288 * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures 1289 * bit is not set. 1290 */ 1291 static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, 1292 const void __user *ubuf, u32 *pkru) 1293 { 1294 struct xregs_state *xsave = &fpstate->regs.xsave; 1295 unsigned int offset, size; 1296 struct xstate_header hdr; 1297 u64 mask; 1298 int i; 1299 1300 offset = offsetof(struct xregs_state, header); 1301 if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf)) 1302 return -EFAULT; 1303 1304 if (validate_user_xstate_header(&hdr, fpstate)) 1305 return -EINVAL; 1306 1307 /* Validate MXCSR when any of the related features is in use */ 1308 mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; 1309 if (hdr.xfeatures & mask) { 1310 u32 mxcsr[2]; 1311 1312 offset = offsetof(struct fxregs_state, mxcsr); 1313 if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf)) 1314 return -EFAULT; 1315 1316 /* Reserved bits in MXCSR must be zero. */ 1317 if (mxcsr[0] & ~mxcsr_feature_mask) 1318 return -EINVAL; 1319 1320 /* SSE and YMM require MXCSR even when FP is not in use. */ 1321 if (!(hdr.xfeatures & XFEATURE_MASK_FP)) { 1322 xsave->i387.mxcsr = mxcsr[0]; 1323 xsave->i387.mxcsr_mask = mxcsr[1]; 1324 } 1325 } 1326 1327 for (i = 0; i < XFEATURE_MAX; i++) { 1328 mask = BIT_ULL(i); 1329 1330 if (hdr.xfeatures & mask) { 1331 void *dst = __raw_xsave_addr(xsave, i); 1332 1333 offset = xstate_offsets[i]; 1334 size = xstate_sizes[i]; 1335 1336 if (copy_from_buffer(dst, offset, size, kbuf, ubuf)) 1337 return -EFAULT; 1338 } 1339 } 1340 1341 if (hdr.xfeatures & XFEATURE_MASK_PKRU) { 1342 struct pkru_state *xpkru; 1343 1344 xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); 1345 *pkru = xpkru->pkru; 1346 } else { 1347 /* 1348 * KVM may pass NULL here to indicate that it does not need 1349 * PKRU updated. 1350 */ 1351 if (pkru) 1352 *pkru = 0; 1353 } 1354 1355 /* 1356 * The state that came in from userspace was user-state only. 1357 * Mask all the user states out of 'xfeatures': 1358 */ 1359 xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; 1360 1361 /* 1362 * Add back in the features that came in from userspace: 1363 */ 1364 xsave->header.xfeatures |= hdr.xfeatures; 1365 1366 return 0; 1367 } 1368 1369 /* 1370 * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] 1371 * format and copy to the target thread. Used by ptrace and KVM. 1372 */ 1373 int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) 1374 { 1375 return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); 1376 } 1377 1378 /* 1379 * Convert from a sigreturn standard-format user-space buffer to kernel 1380 * XSAVE[S] format and copy to the target thread. This is called from the 1381 * sigreturn() and rt_sigreturn() system calls. 1382 */ 1383 int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, 1384 const void __user *ubuf) 1385 { 1386 return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru); 1387 } 1388 1389 static bool validate_independent_components(u64 mask) 1390 { 1391 u64 xchk; 1392 1393 if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES))) 1394 return false; 1395 1396 xchk = ~xfeatures_mask_independent(); 1397 1398 if (WARN_ON_ONCE(!mask || mask & xchk)) 1399 return false; 1400 1401 return true; 1402 } 1403 1404 /** 1405 * xsaves - Save selected components to a kernel xstate buffer 1406 * @xstate: Pointer to the buffer 1407 * @mask: Feature mask to select the components to save 1408 * 1409 * The @xstate buffer must be 64 byte aligned and correctly initialized as 1410 * XSAVES does not write the full xstate header. Before first use the 1411 * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer 1412 * can #GP. 1413 * 1414 * The feature mask must be a subset of the independent features. 1415 */ 1416 void xsaves(struct xregs_state *xstate, u64 mask) 1417 { 1418 int err; 1419 1420 if (!validate_independent_components(mask)) 1421 return; 1422 1423 XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err); 1424 WARN_ON_ONCE(err); 1425 } 1426 1427 /** 1428 * xrstors - Restore selected components from a kernel xstate buffer 1429 * @xstate: Pointer to the buffer 1430 * @mask: Feature mask to select the components to restore 1431 * 1432 * The @xstate buffer must be 64 byte aligned and correctly initialized 1433 * otherwise XRSTORS from that buffer can #GP. 1434 * 1435 * Proper usage is to restore the state which was saved with 1436 * xsaves() into @xstate. 1437 * 1438 * The feature mask must be a subset of the independent features. 1439 */ 1440 void xrstors(struct xregs_state *xstate, u64 mask) 1441 { 1442 int err; 1443 1444 if (!validate_independent_components(mask)) 1445 return; 1446 1447 XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err); 1448 WARN_ON_ONCE(err); 1449 } 1450 1451 #if IS_ENABLED(CONFIG_KVM) 1452 void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature) 1453 { 1454 void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature); 1455 1456 if (addr) 1457 memset(addr, 0, xstate_sizes[xfeature]); 1458 } 1459 EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component); 1460 #endif 1461 1462 #ifdef CONFIG_X86_64 1463 1464 #ifdef CONFIG_X86_DEBUG_FPU 1465 /* 1466 * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask 1467 * can safely operate on the @fpstate buffer. 1468 */ 1469 static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor) 1470 { 1471 u64 xfd = __this_cpu_read(xfd_state); 1472 1473 if (fpstate->xfd == xfd) 1474 return true; 1475 1476 /* 1477 * The XFD MSR does not match fpstate->xfd. That's invalid when 1478 * the passed in fpstate is current's fpstate. 1479 */ 1480 if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd) 1481 return false; 1482 1483 /* 1484 * XRSTOR(S) from init_fpstate are always correct as it will just 1485 * bring all components into init state and not read from the 1486 * buffer. XSAVE(S) raises #PF after init. 1487 */ 1488 if (fpstate == &init_fpstate) 1489 return rstor; 1490 1491 /* 1492 * XSAVE(S): clone(), fpu_swap_kvm_fpstate() 1493 * XRSTORS(S): fpu_swap_kvm_fpstate() 1494 */ 1495 1496 /* 1497 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch 1498 * the buffer area for XFD-disabled state components. 1499 */ 1500 mask &= ~xfd; 1501 1502 /* 1503 * Remove features which are valid in fpstate. They 1504 * have space allocated in fpstate. 1505 */ 1506 mask &= ~fpstate->xfeatures; 1507 1508 /* 1509 * Any remaining state components in 'mask' might be written 1510 * by XSAVE/XRSTOR. Fail validation it found. 1511 */ 1512 return !mask; 1513 } 1514 1515 void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) 1516 { 1517 WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor)); 1518 } 1519 #endif /* CONFIG_X86_DEBUG_FPU */ 1520 1521 static int __init xfd_update_static_branch(void) 1522 { 1523 /* 1524 * If init_fpstate.xfd has bits set then dynamic features are 1525 * available and the dynamic sizing must be enabled. 1526 */ 1527 if (init_fpstate.xfd) 1528 static_branch_enable(&__fpu_state_size_dynamic); 1529 return 0; 1530 } 1531 arch_initcall(xfd_update_static_branch) 1532 1533 void fpstate_free(struct fpu *fpu) 1534 { 1535 if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate) 1536 vfree(fpu->fpstate); 1537 } 1538 1539 /** 1540 * fpstate_realloc - Reallocate struct fpstate for the requested new features 1541 * 1542 * @xfeatures: A bitmap of xstate features which extend the enabled features 1543 * of that task 1544 * @ksize: The required size for the kernel buffer 1545 * @usize: The required size for user space buffers 1546 * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations 1547 * 1548 * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer 1549 * terminates quickly, vfree()-induced IPIs may be a concern, but tasks 1550 * with large states are likely to live longer. 1551 * 1552 * Returns: 0 on success, -ENOMEM on allocation error. 1553 */ 1554 static int fpstate_realloc(u64 xfeatures, unsigned int ksize, 1555 unsigned int usize, struct fpu_guest *guest_fpu) 1556 { 1557 struct fpu *fpu = x86_task_fpu(current); 1558 struct fpstate *curfps, *newfps = NULL; 1559 unsigned int fpsize; 1560 bool in_use; 1561 1562 fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64); 1563 1564 newfps = vzalloc(fpsize); 1565 if (!newfps) 1566 return -ENOMEM; 1567 newfps->size = ksize; 1568 newfps->user_size = usize; 1569 newfps->is_valloc = true; 1570 1571 /* 1572 * When a guest FPU is supplied, use @guest_fpu->fpstate 1573 * as reference independent whether it is in use or not. 1574 */ 1575 curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate; 1576 1577 /* Determine whether @curfps is the active fpstate */ 1578 in_use = fpu->fpstate == curfps; 1579 1580 if (guest_fpu) { 1581 newfps->is_guest = true; 1582 newfps->is_confidential = curfps->is_confidential; 1583 newfps->in_use = curfps->in_use; 1584 guest_fpu->xfeatures |= xfeatures; 1585 guest_fpu->uabi_size = usize; 1586 } 1587 1588 fpregs_lock(); 1589 /* 1590 * If @curfps is in use, ensure that the current state is in the 1591 * registers before swapping fpstate as that might invalidate it 1592 * due to layout changes. 1593 */ 1594 if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD)) 1595 fpregs_restore_userregs(); 1596 1597 newfps->xfeatures = curfps->xfeatures | xfeatures; 1598 newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; 1599 newfps->xfd = curfps->xfd & ~xfeatures; 1600 1601 /* Do the final updates within the locked region */ 1602 xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures); 1603 1604 if (guest_fpu) { 1605 guest_fpu->fpstate = newfps; 1606 /* If curfps is active, update the FPU fpstate pointer */ 1607 if (in_use) 1608 fpu->fpstate = newfps; 1609 } else { 1610 fpu->fpstate = newfps; 1611 } 1612 1613 if (in_use) 1614 xfd_update_state(fpu->fpstate); 1615 fpregs_unlock(); 1616 1617 /* Only free valloc'ed state */ 1618 if (curfps && curfps->is_valloc) 1619 vfree(curfps); 1620 1621 return 0; 1622 } 1623 1624 static int validate_sigaltstack(unsigned int usize) 1625 { 1626 struct task_struct *thread, *leader = current->group_leader; 1627 unsigned long framesize = get_sigframe_size(); 1628 1629 lockdep_assert_held(¤t->sighand->siglock); 1630 1631 /* get_sigframe_size() is based on fpu_user_cfg.max_size */ 1632 framesize -= fpu_user_cfg.max_size; 1633 framesize += usize; 1634 for_each_thread(leader, thread) { 1635 if (thread->sas_ss_size && thread->sas_ss_size < framesize) 1636 return -ENOSPC; 1637 } 1638 return 0; 1639 } 1640 1641 static int __xstate_request_perm(u64 permitted, u64 requested, bool guest) 1642 { 1643 /* 1644 * This deliberately does not exclude !XSAVES as we still might 1645 * decide to optionally context switch XCR0 or talk the silicon 1646 * vendors into extending XFD for the pre AMX states, especially 1647 * AVX512. 1648 */ 1649 bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED); 1650 struct fpu *fpu = x86_task_fpu(current->group_leader); 1651 struct fpu_state_perm *perm; 1652 unsigned int ksize, usize; 1653 u64 mask; 1654 int ret = 0; 1655 1656 /* Check whether fully enabled */ 1657 if ((permitted & requested) == requested) 1658 return 0; 1659 1660 /* 1661 * Calculate the resulting kernel state size. Note, @permitted also 1662 * contains supervisor xfeatures even though supervisor are always 1663 * permitted for kernel and guest FPUs, and never permitted for user 1664 * FPUs. 1665 */ 1666 mask = permitted | requested; 1667 ksize = xstate_calculate_size(mask, compacted); 1668 1669 /* 1670 * Calculate the resulting user state size. Take care not to clobber 1671 * the supervisor xfeatures in the new mask! 1672 */ 1673 usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false); 1674 1675 if (!guest) { 1676 ret = validate_sigaltstack(usize); 1677 if (ret) 1678 return ret; 1679 } 1680 1681 perm = guest ? &fpu->guest_perm : &fpu->perm; 1682 /* Pairs with the READ_ONCE() in xstate_get_group_perm() */ 1683 WRITE_ONCE(perm->__state_perm, mask); 1684 /* Protected by sighand lock */ 1685 perm->__state_size = ksize; 1686 perm->__user_state_size = usize; 1687 return ret; 1688 } 1689 1690 /* 1691 * Permissions array to map facilities with more than one component 1692 */ 1693 static const u64 xstate_prctl_req[XFEATURE_MAX] = { 1694 [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA, 1695 }; 1696 1697 static int xstate_request_perm(unsigned long idx, bool guest) 1698 { 1699 u64 permitted, requested; 1700 int ret; 1701 1702 if (idx >= XFEATURE_MAX) 1703 return -EINVAL; 1704 1705 /* 1706 * Look up the facility mask which can require more than 1707 * one xstate component. 1708 */ 1709 idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req)); 1710 requested = xstate_prctl_req[idx]; 1711 if (!requested) 1712 return -EOPNOTSUPP; 1713 1714 if ((fpu_user_cfg.max_features & requested) != requested) 1715 return -EOPNOTSUPP; 1716 1717 /* Lockless quick check */ 1718 permitted = xstate_get_group_perm(guest); 1719 if ((permitted & requested) == requested) 1720 return 0; 1721 1722 /* Protect against concurrent modifications */ 1723 spin_lock_irq(¤t->sighand->siglock); 1724 permitted = xstate_get_group_perm(guest); 1725 1726 /* First vCPU allocation locks the permissions. */ 1727 if (guest && (permitted & FPU_GUEST_PERM_LOCKED)) 1728 ret = -EBUSY; 1729 else 1730 ret = __xstate_request_perm(permitted, requested, guest); 1731 spin_unlock_irq(¤t->sighand->siglock); 1732 return ret; 1733 } 1734 1735 int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu) 1736 { 1737 u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC; 1738 struct fpu_state_perm *perm; 1739 unsigned int ksize, usize; 1740 struct fpu *fpu; 1741 1742 if (!xfd_event) { 1743 if (!guest_fpu) 1744 pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err); 1745 return 0; 1746 } 1747 1748 /* Protect against concurrent modifications */ 1749 spin_lock_irq(¤t->sighand->siglock); 1750 1751 /* If not permitted let it die */ 1752 if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) { 1753 spin_unlock_irq(¤t->sighand->siglock); 1754 return -EPERM; 1755 } 1756 1757 fpu = x86_task_fpu(current->group_leader); 1758 perm = guest_fpu ? &fpu->guest_perm : &fpu->perm; 1759 ksize = perm->__state_size; 1760 usize = perm->__user_state_size; 1761 1762 /* 1763 * The feature is permitted. State size is sufficient. Dropping 1764 * the lock is safe here even if more features are added from 1765 * another task, the retrieved buffer sizes are valid for the 1766 * currently requested feature(s). 1767 */ 1768 spin_unlock_irq(¤t->sighand->siglock); 1769 1770 /* 1771 * Try to allocate a new fpstate. If that fails there is no way 1772 * out. 1773 */ 1774 if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu)) 1775 return -EFAULT; 1776 return 0; 1777 } 1778 1779 int xfd_enable_feature(u64 xfd_err) 1780 { 1781 return __xfd_enable_feature(xfd_err, NULL); 1782 } 1783 1784 #else /* CONFIG_X86_64 */ 1785 static inline int xstate_request_perm(unsigned long idx, bool guest) 1786 { 1787 return -EPERM; 1788 } 1789 #endif /* !CONFIG_X86_64 */ 1790 1791 u64 xstate_get_guest_group_perm(void) 1792 { 1793 return xstate_get_group_perm(true); 1794 } 1795 EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm); 1796 1797 /** 1798 * fpu_xstate_prctl - xstate permission operations 1799 * @option: A subfunction of arch_prctl() 1800 * @arg2: option argument 1801 * Return: 0 if successful; otherwise, an error code 1802 * 1803 * Option arguments: 1804 * 1805 * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info 1806 * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info 1807 * ARCH_REQ_XCOMP_PERM: Facility number requested 1808 * 1809 * For facilities which require more than one XSTATE component, the request 1810 * must be the highest state component number related to that facility, 1811 * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and 1812 * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18). 1813 */ 1814 long fpu_xstate_prctl(int option, unsigned long arg2) 1815 { 1816 u64 __user *uptr = (u64 __user *)arg2; 1817 u64 permitted, supported; 1818 unsigned long idx = arg2; 1819 bool guest = false; 1820 1821 switch (option) { 1822 case ARCH_GET_XCOMP_SUPP: 1823 supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features; 1824 return put_user(supported, uptr); 1825 1826 case ARCH_GET_XCOMP_PERM: 1827 /* 1828 * Lockless snapshot as it can also change right after the 1829 * dropping the lock. 1830 */ 1831 permitted = xstate_get_host_group_perm(); 1832 permitted &= XFEATURE_MASK_USER_SUPPORTED; 1833 return put_user(permitted, uptr); 1834 1835 case ARCH_GET_XCOMP_GUEST_PERM: 1836 permitted = xstate_get_guest_group_perm(); 1837 permitted &= XFEATURE_MASK_USER_SUPPORTED; 1838 return put_user(permitted, uptr); 1839 1840 case ARCH_REQ_XCOMP_GUEST_PERM: 1841 guest = true; 1842 fallthrough; 1843 1844 case ARCH_REQ_XCOMP_PERM: 1845 if (!IS_ENABLED(CONFIG_X86_64)) 1846 return -EOPNOTSUPP; 1847 1848 return xstate_request_perm(idx, guest); 1849 1850 default: 1851 return -EINVAL; 1852 } 1853 } 1854 1855 #ifdef CONFIG_PROC_PID_ARCH_STATUS 1856 /* 1857 * Report the amount of time elapsed in millisecond since last AVX512 1858 * use in the task. 1859 */ 1860 static void avx512_status(struct seq_file *m, struct task_struct *task) 1861 { 1862 unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp); 1863 long delta; 1864 1865 if (!timestamp) { 1866 /* 1867 * Report -1 if no AVX512 usage 1868 */ 1869 delta = -1; 1870 } else { 1871 delta = (long)(jiffies - timestamp); 1872 /* 1873 * Cap to LONG_MAX if time difference > LONG_MAX 1874 */ 1875 if (delta < 0) 1876 delta = LONG_MAX; 1877 delta = jiffies_to_msecs(delta); 1878 } 1879 1880 seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); 1881 seq_putc(m, '\n'); 1882 } 1883 1884 /* 1885 * Report architecture specific information 1886 */ 1887 int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, 1888 struct pid *pid, struct task_struct *task) 1889 { 1890 /* 1891 * Report AVX512 state if the processor and build option supported. 1892 */ 1893 if (cpu_feature_enabled(X86_FEATURE_AVX512F)) 1894 avx512_status(m, task); 1895 1896 return 0; 1897 } 1898 #endif /* CONFIG_PROC_PID_ARCH_STATUS */ 1899 1900 #ifdef CONFIG_COREDUMP 1901 static const char owner_name[] = "LINUX"; 1902 1903 /* 1904 * Dump type, size, offset and flag values for every xfeature that is present. 1905 */ 1906 static int dump_xsave_layout_desc(struct coredump_params *cprm) 1907 { 1908 int num_records = 0; 1909 int i; 1910 1911 for_each_extended_xfeature(i, fpu_user_cfg.max_features) { 1912 struct x86_xfeat_component xc = { 1913 .type = i, 1914 .size = xstate_sizes[i], 1915 .offset = xstate_offsets[i], 1916 /* reserved for future use */ 1917 .flags = 0, 1918 }; 1919 1920 if (!dump_emit(cprm, &xc, sizeof(xc))) 1921 return 0; 1922 1923 num_records++; 1924 } 1925 return num_records; 1926 } 1927 1928 static u32 get_xsave_desc_size(void) 1929 { 1930 u32 cnt = 0; 1931 u32 i; 1932 1933 for_each_extended_xfeature(i, fpu_user_cfg.max_features) 1934 cnt++; 1935 1936 return cnt * (sizeof(struct x86_xfeat_component)); 1937 } 1938 1939 int elf_coredump_extra_notes_write(struct coredump_params *cprm) 1940 { 1941 int num_records = 0; 1942 struct elf_note en; 1943 1944 if (!fpu_user_cfg.max_features) 1945 return 0; 1946 1947 en.n_namesz = sizeof(owner_name); 1948 en.n_descsz = get_xsave_desc_size(); 1949 en.n_type = NT_X86_XSAVE_LAYOUT; 1950 1951 if (!dump_emit(cprm, &en, sizeof(en))) 1952 return 1; 1953 if (!dump_emit(cprm, owner_name, en.n_namesz)) 1954 return 1; 1955 if (!dump_align(cprm, 4)) 1956 return 1; 1957 1958 num_records = dump_xsave_layout_desc(cprm); 1959 if (!num_records) 1960 return 1; 1961 1962 /* Total size should be equal to the number of records */ 1963 if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz) 1964 return 1; 1965 1966 return 0; 1967 } 1968 1969 int elf_coredump_extra_notes_size(void) 1970 { 1971 int size; 1972 1973 if (!fpu_user_cfg.max_features) 1974 return 0; 1975 1976 /* .note header */ 1977 size = sizeof(struct elf_note); 1978 /* Name plus alignment to 4 bytes */ 1979 size += roundup(sizeof(owner_name), 4); 1980 size += get_xsave_desc_size(); 1981 1982 return size; 1983 } 1984 #endif /* CONFIG_COREDUMP */ 1985