1 /* 2 * xsave/xrstor support. 3 * 4 * Author: Suresh Siddha <suresh.b.siddha@intel.com> 5 */ 6 #include <linux/compat.h> 7 #include <linux/cpu.h> 8 #include <linux/pkeys.h> 9 10 #include <asm/fpu/api.h> 11 #include <asm/fpu/internal.h> 12 #include <asm/fpu/signal.h> 13 #include <asm/fpu/regset.h> 14 15 #include <asm/tlbflush.h> 16 17 /* 18 * Although we spell it out in here, the Processor Trace 19 * xfeature is completely unused. We use other mechanisms 20 * to save/restore PT state in Linux. 21 */ 22 static const char *xfeature_names[] = 23 { 24 "x87 floating point registers" , 25 "SSE registers" , 26 "AVX registers" , 27 "MPX bounds registers" , 28 "MPX CSR" , 29 "AVX-512 opmask" , 30 "AVX-512 Hi256" , 31 "AVX-512 ZMM_Hi256" , 32 "Processor Trace (unused)" , 33 "Protection Keys User registers", 34 "unknown xstate feature" , 35 }; 36 37 /* 38 * Mask of xstate features supported by the CPU and the kernel: 39 */ 40 u64 xfeatures_mask __read_mostly; 41 42 static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; 43 static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; 44 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; 45 46 /* 47 * Clear all of the X86_FEATURE_* bits that are unavailable 48 * when the CPU has no XSAVE support. 49 */ 50 void fpu__xstate_clear_all_cpu_caps(void) 51 { 52 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 53 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 54 setup_clear_cpu_cap(X86_FEATURE_XSAVEC); 55 setup_clear_cpu_cap(X86_FEATURE_XSAVES); 56 setup_clear_cpu_cap(X86_FEATURE_AVX); 57 setup_clear_cpu_cap(X86_FEATURE_AVX2); 58 setup_clear_cpu_cap(X86_FEATURE_AVX512F); 59 setup_clear_cpu_cap(X86_FEATURE_AVX512PF); 60 setup_clear_cpu_cap(X86_FEATURE_AVX512ER); 61 setup_clear_cpu_cap(X86_FEATURE_AVX512CD); 62 setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); 63 setup_clear_cpu_cap(X86_FEATURE_AVX512BW); 64 setup_clear_cpu_cap(X86_FEATURE_AVX512VL); 65 setup_clear_cpu_cap(X86_FEATURE_MPX); 66 setup_clear_cpu_cap(X86_FEATURE_XGETBV1); 67 setup_clear_cpu_cap(X86_FEATURE_PKU); 68 } 69 70 /* 71 * Return whether the system supports a given xfeature. 72 * 73 * Also return the name of the (most advanced) feature that the caller requested: 74 */ 75 int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) 76 { 77 u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; 78 79 if (unlikely(feature_name)) { 80 long xfeature_idx, max_idx; 81 u64 xfeatures_print; 82 /* 83 * So we use FLS here to be able to print the most advanced 84 * feature that was requested but is missing. So if a driver 85 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the 86 * missing AVX feature - this is the most informative message 87 * to users: 88 */ 89 if (xfeatures_missing) 90 xfeatures_print = xfeatures_missing; 91 else 92 xfeatures_print = xfeatures_needed; 93 94 xfeature_idx = fls64(xfeatures_print)-1; 95 max_idx = ARRAY_SIZE(xfeature_names)-1; 96 xfeature_idx = min(xfeature_idx, max_idx); 97 98 *feature_name = xfeature_names[xfeature_idx]; 99 } 100 101 if (xfeatures_missing) 102 return 0; 103 104 return 1; 105 } 106 EXPORT_SYMBOL_GPL(cpu_has_xfeatures); 107 108 /* 109 * When executing XSAVEOPT (or other optimized XSAVE instructions), if 110 * a processor implementation detects that an FPU state component is still 111 * (or is again) in its initialized state, it may clear the corresponding 112 * bit in the header.xfeatures field, and can skip the writeout of registers 113 * to the corresponding memory layout. 114 * 115 * This means that when the bit is zero, the state component might still contain 116 * some previous - non-initialized register state. 117 * 118 * Before writing xstate information to user-space we sanitize those components, 119 * to always ensure that the memory layout of a feature will be in the init state 120 * if the corresponding header bit is zero. This is to ensure that user-space doesn't 121 * see some stale state in the memory layout during signal handling, debugging etc. 122 */ 123 void fpstate_sanitize_xstate(struct fpu *fpu) 124 { 125 struct fxregs_state *fx = &fpu->state.fxsave; 126 int feature_bit; 127 u64 xfeatures; 128 129 if (!use_xsaveopt()) 130 return; 131 132 xfeatures = fpu->state.xsave.header.xfeatures; 133 134 /* 135 * None of the feature bits are in init state. So nothing else 136 * to do for us, as the memory layout is up to date. 137 */ 138 if ((xfeatures & xfeatures_mask) == xfeatures_mask) 139 return; 140 141 /* 142 * FP is in init state 143 */ 144 if (!(xfeatures & XFEATURE_MASK_FP)) { 145 fx->cwd = 0x37f; 146 fx->swd = 0; 147 fx->twd = 0; 148 fx->fop = 0; 149 fx->rip = 0; 150 fx->rdp = 0; 151 memset(&fx->st_space[0], 0, 128); 152 } 153 154 /* 155 * SSE is in init state 156 */ 157 if (!(xfeatures & XFEATURE_MASK_SSE)) 158 memset(&fx->xmm_space[0], 0, 256); 159 160 /* 161 * First two features are FPU and SSE, which above we handled 162 * in a special way already: 163 */ 164 feature_bit = 0x2; 165 xfeatures = (xfeatures_mask & ~xfeatures) >> 2; 166 167 /* 168 * Update all the remaining memory layouts according to their 169 * standard xstate layout, if their header bit is in the init 170 * state: 171 */ 172 while (xfeatures) { 173 if (xfeatures & 0x1) { 174 int offset = xstate_offsets[feature_bit]; 175 int size = xstate_sizes[feature_bit]; 176 177 memcpy((void *)fx + offset, 178 (void *)&init_fpstate.xsave + offset, 179 size); 180 } 181 182 xfeatures >>= 1; 183 feature_bit++; 184 } 185 } 186 187 /* 188 * Enable the extended processor state save/restore feature. 189 * Called once per CPU onlining. 190 */ 191 void fpu__init_cpu_xstate(void) 192 { 193 if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) 194 return; 195 196 cr4_set_bits(X86_CR4_OSXSAVE); 197 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); 198 } 199 200 /* 201 * Note that in the future we will likely need a pair of 202 * functions here: one for user xstates and the other for 203 * system xstates. For now, they are the same. 204 */ 205 static int xfeature_enabled(enum xfeature xfeature) 206 { 207 return !!(xfeatures_mask & (1UL << xfeature)); 208 } 209 210 /* 211 * Record the offsets and sizes of various xstates contained 212 * in the XSAVE state memory layout. 213 */ 214 static void __init setup_xstate_features(void) 215 { 216 u32 eax, ebx, ecx, edx, i; 217 /* start at the beginnning of the "extended state" */ 218 unsigned int last_good_offset = offsetof(struct xregs_state, 219 extended_state_area); 220 221 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 222 if (!xfeature_enabled(i)) 223 continue; 224 225 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); 226 xstate_offsets[i] = ebx; 227 xstate_sizes[i] = eax; 228 /* 229 * In our xstate size checks, we assume that the 230 * highest-numbered xstate feature has the 231 * highest offset in the buffer. Ensure it does. 232 */ 233 WARN_ONCE(last_good_offset > xstate_offsets[i], 234 "x86/fpu: misordered xstate at %d\n", last_good_offset); 235 last_good_offset = xstate_offsets[i]; 236 237 printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax); 238 } 239 } 240 241 static void __init print_xstate_feature(u64 xstate_mask) 242 { 243 const char *feature_name; 244 245 if (cpu_has_xfeatures(xstate_mask, &feature_name)) 246 pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name); 247 } 248 249 /* 250 * Print out all the supported xstate features: 251 */ 252 static void __init print_xstate_features(void) 253 { 254 print_xstate_feature(XFEATURE_MASK_FP); 255 print_xstate_feature(XFEATURE_MASK_SSE); 256 print_xstate_feature(XFEATURE_MASK_YMM); 257 print_xstate_feature(XFEATURE_MASK_BNDREGS); 258 print_xstate_feature(XFEATURE_MASK_BNDCSR); 259 print_xstate_feature(XFEATURE_MASK_OPMASK); 260 print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); 261 print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); 262 print_xstate_feature(XFEATURE_MASK_PKRU); 263 } 264 265 /* 266 * This function sets up offsets and sizes of all extended states in 267 * xsave area. This supports both standard format and compacted format 268 * of the xsave aread. 269 */ 270 static void __init setup_xstate_comp(void) 271 { 272 unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; 273 int i; 274 275 /* 276 * The FP xstates and SSE xstates are legacy states. They are always 277 * in the fixed offsets in the xsave area in either compacted form 278 * or standard form. 279 */ 280 xstate_comp_offsets[0] = 0; 281 xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); 282 283 if (!boot_cpu_has(X86_FEATURE_XSAVES)) { 284 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 285 if (xfeature_enabled(i)) { 286 xstate_comp_offsets[i] = xstate_offsets[i]; 287 xstate_comp_sizes[i] = xstate_sizes[i]; 288 } 289 } 290 return; 291 } 292 293 xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] = 294 FXSAVE_SIZE + XSAVE_HDR_SIZE; 295 296 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 297 if (xfeature_enabled(i)) 298 xstate_comp_sizes[i] = xstate_sizes[i]; 299 else 300 xstate_comp_sizes[i] = 0; 301 302 if (i > FIRST_EXTENDED_XFEATURE) 303 xstate_comp_offsets[i] = xstate_comp_offsets[i-1] 304 + xstate_comp_sizes[i-1]; 305 306 } 307 } 308 309 /* 310 * setup the xstate image representing the init state 311 */ 312 static void __init setup_init_fpu_buf(void) 313 { 314 static int on_boot_cpu __initdata = 1; 315 316 WARN_ON_FPU(!on_boot_cpu); 317 on_boot_cpu = 0; 318 319 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 320 return; 321 322 setup_xstate_features(); 323 print_xstate_features(); 324 325 if (boot_cpu_has(X86_FEATURE_XSAVES)) { 326 init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; 327 init_fpstate.xsave.header.xfeatures = xfeatures_mask; 328 } 329 330 /* 331 * Init all the features state with header_bv being 0x0 332 */ 333 copy_kernel_to_xregs_booting(&init_fpstate.xsave); 334 335 /* 336 * Dump the init state again. This is to identify the init state 337 * of any feature which is not represented by all zero's. 338 */ 339 copy_xregs_to_kernel_booting(&init_fpstate.xsave); 340 } 341 342 static int xfeature_is_supervisor(int xfeature_nr) 343 { 344 /* 345 * We currently do not support supervisor states, but if 346 * we did, we could find out like this. 347 * 348 * SDM says: If state component i is a user state component, 349 * ECX[0] return 0; if state component i is a supervisor 350 * state component, ECX[0] returns 1. 351 u32 eax, ebx, ecx, edx; 352 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx; 353 return !!(ecx & 1); 354 */ 355 return 0; 356 } 357 /* 358 static int xfeature_is_user(int xfeature_nr) 359 { 360 return !xfeature_is_supervisor(xfeature_nr); 361 } 362 */ 363 364 /* 365 * This check is important because it is easy to get XSTATE_* 366 * confused with XSTATE_BIT_*. 367 */ 368 #define CHECK_XFEATURE(nr) do { \ 369 WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ 370 WARN_ON(nr >= XFEATURE_MAX); \ 371 } while (0) 372 373 /* 374 * We could cache this like xstate_size[], but we only use 375 * it here, so it would be a waste of space. 376 */ 377 static int xfeature_is_aligned(int xfeature_nr) 378 { 379 u32 eax, ebx, ecx, edx; 380 381 CHECK_XFEATURE(xfeature_nr); 382 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 383 /* 384 * The value returned by ECX[1] indicates the alignment 385 * of state component i when the compacted format 386 * of the extended region of an XSAVE area is used 387 */ 388 return !!(ecx & 2); 389 } 390 391 static int xfeature_uncompacted_offset(int xfeature_nr) 392 { 393 u32 eax, ebx, ecx, edx; 394 395 CHECK_XFEATURE(xfeature_nr); 396 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 397 return ebx; 398 } 399 400 static int xfeature_size(int xfeature_nr) 401 { 402 u32 eax, ebx, ecx, edx; 403 404 CHECK_XFEATURE(xfeature_nr); 405 cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); 406 return eax; 407 } 408 409 /* 410 * 'XSAVES' implies two different things: 411 * 1. saving of supervisor/system state 412 * 2. using the compacted format 413 * 414 * Use this function when dealing with the compacted format so 415 * that it is obvious which aspect of 'XSAVES' is being handled 416 * by the calling code. 417 */ 418 static int using_compacted_format(void) 419 { 420 return boot_cpu_has(X86_FEATURE_XSAVES); 421 } 422 423 static void __xstate_dump_leaves(void) 424 { 425 int i; 426 u32 eax, ebx, ecx, edx; 427 static int should_dump = 1; 428 429 if (!should_dump) 430 return; 431 should_dump = 0; 432 /* 433 * Dump out a few leaves past the ones that we support 434 * just in case there are some goodies up there 435 */ 436 for (i = 0; i < XFEATURE_MAX + 10; i++) { 437 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); 438 pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", 439 XSTATE_CPUID, i, eax, ebx, ecx, edx); 440 } 441 } 442 443 #define XSTATE_WARN_ON(x) do { \ 444 if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \ 445 __xstate_dump_leaves(); \ 446 } \ 447 } while (0) 448 449 #define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \ 450 if ((nr == nr_macro) && \ 451 WARN_ONCE(sz != sizeof(__struct), \ 452 "%s: struct is %zu bytes, cpu state %d bytes\n", \ 453 __stringify(nr_macro), sizeof(__struct), sz)) { \ 454 __xstate_dump_leaves(); \ 455 } \ 456 } while (0) 457 458 /* 459 * We have a C struct for each 'xstate'. We need to ensure 460 * that our software representation matches what the CPU 461 * tells us about the state's size. 462 */ 463 static void check_xstate_against_struct(int nr) 464 { 465 /* 466 * Ask the CPU for the size of the state. 467 */ 468 int sz = xfeature_size(nr); 469 /* 470 * Match each CPU state with the corresponding software 471 * structure. 472 */ 473 XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct); 474 XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state); 475 XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state); 476 XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state); 477 XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state); 478 XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); 479 XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); 480 481 /* 482 * Make *SURE* to add any feature numbers in below if 483 * there are "holes" in the xsave state component 484 * numbers. 485 */ 486 if ((nr < XFEATURE_YMM) || 487 (nr >= XFEATURE_MAX) || 488 (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) { 489 WARN_ONCE(1, "no structure for xstate: %d\n", nr); 490 XSTATE_WARN_ON(1); 491 } 492 } 493 494 /* 495 * This essentially double-checks what the cpu told us about 496 * how large the XSAVE buffer needs to be. We are recalculating 497 * it to be safe. 498 */ 499 static void do_extra_xstate_size_checks(void) 500 { 501 int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; 502 int i; 503 504 for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { 505 if (!xfeature_enabled(i)) 506 continue; 507 508 check_xstate_against_struct(i); 509 /* 510 * Supervisor state components can be managed only by 511 * XSAVES, which is compacted-format only. 512 */ 513 if (!using_compacted_format()) 514 XSTATE_WARN_ON(xfeature_is_supervisor(i)); 515 516 /* Align from the end of the previous feature */ 517 if (xfeature_is_aligned(i)) 518 paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64); 519 /* 520 * The offset of a given state in the non-compacted 521 * format is given to us in a CPUID leaf. We check 522 * them for being ordered (increasing offsets) in 523 * setup_xstate_features(). 524 */ 525 if (!using_compacted_format()) 526 paranoid_xstate_size = xfeature_uncompacted_offset(i); 527 /* 528 * The compacted-format offset always depends on where 529 * the previous state ended. 530 */ 531 paranoid_xstate_size += xfeature_size(i); 532 } 533 XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); 534 } 535 536 /* 537 * Calculate total size of enabled xstates in XCR0/xfeatures_mask. 538 * 539 * Note the SDM's wording here. "sub-function 0" only enumerates 540 * the size of the *user* states. If we use it to size a buffer 541 * that we use 'XSAVES' on, we could potentially overflow the 542 * buffer because 'XSAVES' saves system states too. 543 * 544 * Note that we do not currently set any bits on IA32_XSS so 545 * 'XCR0 | IA32_XSS == XCR0' for now. 546 */ 547 static unsigned int __init calculate_xstate_size(void) 548 { 549 unsigned int eax, ebx, ecx, edx; 550 unsigned int calculated_xstate_size; 551 552 if (!boot_cpu_has(X86_FEATURE_XSAVES)) { 553 /* 554 * - CPUID function 0DH, sub-function 0: 555 * EBX enumerates the size (in bytes) required by 556 * the XSAVE instruction for an XSAVE area 557 * containing all the *user* state components 558 * corresponding to bits currently set in XCR0. 559 */ 560 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); 561 calculated_xstate_size = ebx; 562 } else { 563 /* 564 * - CPUID function 0DH, sub-function 1: 565 * EBX enumerates the size (in bytes) required by 566 * the XSAVES instruction for an XSAVE area 567 * containing all the state components 568 * corresponding to bits currently set in 569 * XCR0 | IA32_XSS. 570 */ 571 cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); 572 calculated_xstate_size = ebx; 573 } 574 return calculated_xstate_size; 575 } 576 577 /* 578 * Will the runtime-enumerated 'xstate_size' fit in the init 579 * task's statically-allocated buffer? 580 */ 581 static bool is_supported_xstate_size(unsigned int test_xstate_size) 582 { 583 if (test_xstate_size <= sizeof(union fpregs_state)) 584 return true; 585 586 pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", 587 sizeof(union fpregs_state), test_xstate_size); 588 return false; 589 } 590 591 static int init_xstate_size(void) 592 { 593 /* Recompute the context size for enabled features: */ 594 unsigned int possible_xstate_size = calculate_xstate_size(); 595 596 /* Ensure we have the space to store all enabled: */ 597 if (!is_supported_xstate_size(possible_xstate_size)) 598 return -EINVAL; 599 600 /* 601 * The size is OK, we are definitely going to use xsave, 602 * make it known to the world that we need more space. 603 */ 604 xstate_size = possible_xstate_size; 605 do_extra_xstate_size_checks(); 606 return 0; 607 } 608 609 /* 610 * We enabled the XSAVE hardware, but something went wrong and 611 * we can not use it. Disable it. 612 */ 613 static void fpu__init_disable_system_xstate(void) 614 { 615 xfeatures_mask = 0; 616 cr4_clear_bits(X86_CR4_OSXSAVE); 617 fpu__xstate_clear_all_cpu_caps(); 618 } 619 620 /* 621 * Enable and initialize the xsave feature. 622 * Called once per system bootup. 623 */ 624 void __init fpu__init_system_xstate(void) 625 { 626 unsigned int eax, ebx, ecx, edx; 627 static int on_boot_cpu __initdata = 1; 628 int err; 629 630 WARN_ON_FPU(!on_boot_cpu); 631 on_boot_cpu = 0; 632 633 if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 634 pr_info("x86/fpu: Legacy x87 FPU detected.\n"); 635 return; 636 } 637 638 if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { 639 WARN_ON_FPU(1); 640 return; 641 } 642 643 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); 644 xfeatures_mask = eax + ((u64)edx << 32); 645 646 if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { 647 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); 648 BUG(); 649 } 650 651 xfeatures_mask &= fpu__get_supported_xfeatures_mask(); 652 653 /* Enable xstate instructions to be able to continue with initialization: */ 654 fpu__init_cpu_xstate(); 655 err = init_xstate_size(); 656 if (err) { 657 /* something went wrong, boot without any XSAVE support */ 658 fpu__init_disable_system_xstate(); 659 return; 660 } 661 662 update_regset_xstate_info(xstate_size, xfeatures_mask); 663 fpu__init_prepare_fx_sw_frame(); 664 setup_init_fpu_buf(); 665 setup_xstate_comp(); 666 667 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", 668 xfeatures_mask, 669 xstate_size, 670 boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); 671 } 672 673 /* 674 * Restore minimal FPU state after suspend: 675 */ 676 void fpu__resume_cpu(void) 677 { 678 /* 679 * Restore XCR0 on xsave capable CPUs: 680 */ 681 if (boot_cpu_has(X86_FEATURE_XSAVE)) 682 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); 683 } 684 685 /* 686 * Given an xstate feature mask, calculate where in the xsave 687 * buffer the state is. Callers should ensure that the buffer 688 * is valid. 689 * 690 * Note: does not work for compacted buffers. 691 */ 692 void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) 693 { 694 int feature_nr = fls64(xstate_feature_mask) - 1; 695 696 return (void *)xsave + xstate_comp_offsets[feature_nr]; 697 } 698 /* 699 * Given the xsave area and a state inside, this function returns the 700 * address of the state. 701 * 702 * This is the API that is called to get xstate address in either 703 * standard format or compacted format of xsave area. 704 * 705 * Note that if there is no data for the field in the xsave buffer 706 * this will return NULL. 707 * 708 * Inputs: 709 * xstate: the thread's storage area for all FPU data 710 * xstate_feature: state which is defined in xsave.h (e.g. 711 * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) 712 * Output: 713 * address of the state in the xsave area, or NULL if the 714 * field is not present in the xsave buffer. 715 */ 716 void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) 717 { 718 /* 719 * Do we even *have* xsave state? 720 */ 721 if (!boot_cpu_has(X86_FEATURE_XSAVE)) 722 return NULL; 723 724 /* 725 * We should not ever be requesting features that we 726 * have not enabled. Remember that pcntxt_mask is 727 * what we write to the XCR0 register. 728 */ 729 WARN_ONCE(!(xfeatures_mask & xstate_feature), 730 "get of unsupported state"); 731 /* 732 * This assumes the last 'xsave*' instruction to 733 * have requested that 'xstate_feature' be saved. 734 * If it did not, we might be seeing and old value 735 * of the field in the buffer. 736 * 737 * This can happen because the last 'xsave' did not 738 * request that this feature be saved (unlikely) 739 * or because the "init optimization" caused it 740 * to not be saved. 741 */ 742 if (!(xsave->header.xfeatures & xstate_feature)) 743 return NULL; 744 745 return __raw_xsave_addr(xsave, xstate_feature); 746 } 747 EXPORT_SYMBOL_GPL(get_xsave_addr); 748 749 /* 750 * This wraps up the common operations that need to occur when retrieving 751 * data from xsave state. It first ensures that the current task was 752 * using the FPU and retrieves the data in to a buffer. It then calculates 753 * the offset of the requested field in the buffer. 754 * 755 * This function is safe to call whether the FPU is in use or not. 756 * 757 * Note that this only works on the current task. 758 * 759 * Inputs: 760 * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, 761 * XFEATURE_MASK_SSE, etc...) 762 * Output: 763 * address of the state in the xsave area or NULL if the state 764 * is not present or is in its 'init state'. 765 */ 766 const void *get_xsave_field_ptr(int xsave_state) 767 { 768 struct fpu *fpu = ¤t->thread.fpu; 769 770 if (!fpu->fpstate_active) 771 return NULL; 772 /* 773 * fpu__save() takes the CPU's xstate registers 774 * and saves them off to the 'fpu memory buffer. 775 */ 776 fpu__save(fpu); 777 778 return get_xsave_addr(&fpu->state.xsave, xsave_state); 779 } 780 781 782 /* 783 * Set xfeatures (aka XSTATE_BV) bit for a feature that we want 784 * to take out of its "init state". This will ensure that an 785 * XRSTOR actually restores the state. 786 */ 787 static void fpu__xfeature_set_non_init(struct xregs_state *xsave, 788 int xstate_feature_mask) 789 { 790 xsave->header.xfeatures |= xstate_feature_mask; 791 } 792 793 /* 794 * This function is safe to call whether the FPU is in use or not. 795 * 796 * Note that this only works on the current task. 797 * 798 * Inputs: 799 * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, 800 * XFEATURE_MASK_SSE, etc...) 801 * @xsave_state_ptr: a pointer to a copy of the state that you would 802 * like written in to the current task's FPU xsave state. This pointer 803 * must not be located in the current tasks's xsave area. 804 * Output: 805 * address of the state in the xsave area or NULL if the state 806 * is not present or is in its 'init state'. 807 */ 808 static void fpu__xfeature_set_state(int xstate_feature_mask, 809 void *xstate_feature_src, size_t len) 810 { 811 struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; 812 struct fpu *fpu = ¤t->thread.fpu; 813 void *dst; 814 815 if (!boot_cpu_has(X86_FEATURE_XSAVE)) { 816 WARN_ONCE(1, "%s() attempted with no xsave support", __func__); 817 return; 818 } 819 820 /* 821 * Tell the FPU code that we need the FPU state to be in 822 * 'fpu' (not in the registers), and that we need it to 823 * be stable while we write to it. 824 */ 825 fpu__current_fpstate_write_begin(); 826 827 /* 828 * This method *WILL* *NOT* work for compact-format 829 * buffers. If the 'xstate_feature_mask' is unset in 830 * xcomp_bv then we may need to move other feature state 831 * "up" in the buffer. 832 */ 833 if (xsave->header.xcomp_bv & xstate_feature_mask) { 834 WARN_ON_ONCE(1); 835 goto out; 836 } 837 838 /* find the location in the xsave buffer of the desired state */ 839 dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); 840 841 /* 842 * Make sure that the pointer being passed in did not 843 * come from the xsave buffer itself. 844 */ 845 WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); 846 847 /* put the caller-provided data in the location */ 848 memcpy(dst, xstate_feature_src, len); 849 850 /* 851 * Mark the xfeature so that the CPU knows there is state 852 * in the buffer now. 853 */ 854 fpu__xfeature_set_non_init(xsave, xstate_feature_mask); 855 out: 856 /* 857 * We are done writing to the 'fpu'. Reenable preeption 858 * and (possibly) move the fpstate back in to the fpregs. 859 */ 860 fpu__current_fpstate_write_end(); 861 } 862 863 #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) 864 #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) 865 866 /* 867 * This will go out and modify the XSAVE buffer so that PKRU is 868 * set to a particular state for access to 'pkey'. 869 * 870 * PKRU state does affect kernel access to user memory. We do 871 * not modfiy PKRU *itself* here, only the XSAVE state that will 872 * be restored in to PKRU when we return back to userspace. 873 */ 874 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, 875 unsigned long init_val) 876 { 877 struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; 878 struct pkru_state *old_pkru_state; 879 struct pkru_state new_pkru_state; 880 int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); 881 u32 new_pkru_bits = 0; 882 883 /* 884 * This check implies XSAVE support. OSPKE only gets 885 * set if we enable XSAVE and we enable PKU in XCR0. 886 */ 887 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 888 return -EINVAL; 889 890 /* Set the bits we need in PKRU */ 891 if (init_val & PKEY_DISABLE_ACCESS) 892 new_pkru_bits |= PKRU_AD_BIT; 893 if (init_val & PKEY_DISABLE_WRITE) 894 new_pkru_bits |= PKRU_WD_BIT; 895 896 /* Shift the bits in to the correct place in PKRU for pkey. */ 897 new_pkru_bits <<= pkey_shift; 898 899 /* Locate old copy of the state in the xsave buffer */ 900 old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); 901 902 /* 903 * When state is not in the buffer, it is in the init 904 * state, set it manually. Otherwise, copy out the old 905 * state. 906 */ 907 if (!old_pkru_state) 908 new_pkru_state.pkru = 0; 909 else 910 new_pkru_state.pkru = old_pkru_state->pkru; 911 912 /* mask off any old bits in place */ 913 new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); 914 /* Set the newly-requested bits */ 915 new_pkru_state.pkru |= new_pkru_bits; 916 917 /* 918 * We could theoretically live without zeroing pkru.pad. 919 * The current XSAVE feature state definition says that 920 * only bytes 0->3 are used. But we do not want to 921 * chance leaking kernel stack out to userspace in case a 922 * memcpy() of the whole xsave buffer was done. 923 * 924 * They're in the same cacheline anyway. 925 */ 926 new_pkru_state.pad = 0; 927 928 fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, 929 sizeof(new_pkru_state)); 930 931 return 0; 932 } 933