1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * AMD Encrypted Register State Support 4 * 5 * Author: Joerg Roedel <jroedel@suse.de> 6 * 7 * This file is not compiled stand-alone. It contains code shared 8 * between the pre-decompression boot code and the running Linux kernel 9 * and is included directly into both code-bases. 10 */ 11 12 #include <asm/setup_data.h> 13 14 #ifndef __BOOT_COMPRESSED 15 #define error(v) pr_err(v) 16 #define has_cpuflag(f) boot_cpu_has(f) 17 #else 18 #undef WARN 19 #define WARN(condition, format...) (!!(condition)) 20 #undef vc_forward_exception 21 #define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n") 22 #endif 23 24 /* 25 * SVSM related information: 26 * During boot, the page tables are set up as identity mapped and later 27 * changed to use kernel virtual addresses. Maintain separate virtual and 28 * physical addresses for the CAA to allow SVSM functions to be used during 29 * early boot, both with identity mapped virtual addresses and proper kernel 30 * virtual addresses. 31 */ 32 struct svsm_ca *boot_svsm_caa __ro_after_init; 33 u64 boot_svsm_caa_pa __ro_after_init; 34 35 /* 36 * Since feature negotiation related variables are set early in the boot 37 * process they must reside in the .data section so as not to be zeroed 38 * out when the .bss section is later cleared. 39 * 40 * GHCB protocol version negotiated with the hypervisor. 41 */ 42 static u16 ghcb_version __ro_after_init; 43 44 /* Copy of the SNP firmware's CPUID page. */ 45 static struct snp_cpuid_table cpuid_table_copy __ro_after_init; 46 47 /* 48 * These will be initialized based on CPUID table so that non-present 49 * all-zero leaves (for sparse tables) can be differentiated from 50 * invalid/out-of-range leaves. This is needed since all-zero leaves 51 * still need to be post-processed. 52 */ 53 static u32 cpuid_std_range_max __ro_after_init; 54 static u32 cpuid_hyp_range_max __ro_after_init; 55 static u32 cpuid_ext_range_max __ro_after_init; 56 57 bool __init sev_es_check_cpu_features(void) 58 { 59 if (!has_cpuflag(X86_FEATURE_RDRAND)) { 60 error("RDRAND instruction not supported - no trusted source of randomness available\n"); 61 return false; 62 } 63 64 return true; 65 } 66 67 void __head __noreturn 68 sev_es_terminate(unsigned int set, unsigned int reason) 69 { 70 u64 val = GHCB_MSR_TERM_REQ; 71 72 /* Tell the hypervisor what went wrong. */ 73 val |= GHCB_SEV_TERM_REASON(set, reason); 74 75 /* Request Guest Termination from Hypervisor */ 76 sev_es_wr_ghcb_msr(val); 77 VMGEXIT(); 78 79 while (true) 80 asm volatile("hlt\n" : : : "memory"); 81 } 82 83 /* 84 * The hypervisor features are available from GHCB version 2 onward. 85 */ 86 u64 get_hv_features(void) 87 { 88 u64 val; 89 90 if (ghcb_version < 2) 91 return 0; 92 93 sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ); 94 VMGEXIT(); 95 96 val = sev_es_rd_ghcb_msr(); 97 if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP) 98 return 0; 99 100 return GHCB_MSR_HV_FT_RESP_VAL(val); 101 } 102 103 void snp_register_ghcb_early(unsigned long paddr) 104 { 105 unsigned long pfn = paddr >> PAGE_SHIFT; 106 u64 val; 107 108 sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); 109 VMGEXIT(); 110 111 val = sev_es_rd_ghcb_msr(); 112 113 /* If the response GPA is not ours then abort the guest */ 114 if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || 115 (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) 116 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); 117 } 118 119 bool sev_es_negotiate_protocol(void) 120 { 121 u64 val; 122 123 /* Do the GHCB protocol version negotiation */ 124 sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); 125 VMGEXIT(); 126 val = sev_es_rd_ghcb_msr(); 127 128 if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) 129 return false; 130 131 if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || 132 GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) 133 return false; 134 135 ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); 136 137 return true; 138 } 139 140 static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 141 { 142 u32 ret; 143 144 ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); 145 if (!ret) 146 return ES_OK; 147 148 if (ret == 1) { 149 u64 info = ghcb->save.sw_exit_info_2; 150 unsigned long v = info & SVM_EVTINJ_VEC_MASK; 151 152 /* Check if exception information from hypervisor is sane. */ 153 if ((info & SVM_EVTINJ_VALID) && 154 ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && 155 ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { 156 ctxt->fi.vector = v; 157 158 if (info & SVM_EVTINJ_VALID_ERR) 159 ctxt->fi.error_code = info >> 32; 160 161 return ES_EXCEPTION; 162 } 163 } 164 165 return ES_VMM_ERROR; 166 } 167 168 static inline int svsm_process_result_codes(struct svsm_call *call) 169 { 170 switch (call->rax_out) { 171 case SVSM_SUCCESS: 172 return 0; 173 case SVSM_ERR_INCOMPLETE: 174 case SVSM_ERR_BUSY: 175 return -EAGAIN; 176 default: 177 return -EINVAL; 178 } 179 } 180 181 /* 182 * Issue a VMGEXIT to call the SVSM: 183 * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9) 184 * - Set the CA call pending field to 1 185 * - Issue VMGEXIT 186 * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9) 187 * - Perform atomic exchange of the CA call pending field 188 * 189 * - See the "Secure VM Service Module for SEV-SNP Guests" specification for 190 * details on the calling convention. 191 * - The calling convention loosely follows the Microsoft X64 calling 192 * convention by putting arguments in RCX, RDX, R8 and R9. 193 * - RAX specifies the SVSM protocol/callid as input and the return code 194 * as output. 195 */ 196 static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending) 197 { 198 register unsigned long rax asm("rax") = call->rax; 199 register unsigned long rcx asm("rcx") = call->rcx; 200 register unsigned long rdx asm("rdx") = call->rdx; 201 register unsigned long r8 asm("r8") = call->r8; 202 register unsigned long r9 asm("r9") = call->r9; 203 204 call->caa->call_pending = 1; 205 206 asm volatile("rep; vmmcall\n\t" 207 : "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9) 208 : : "memory"); 209 210 *pending = xchg(&call->caa->call_pending, *pending); 211 212 call->rax_out = rax; 213 call->rcx_out = rcx; 214 call->rdx_out = rdx; 215 call->r8_out = r8; 216 call->r9_out = r9; 217 } 218 219 static int svsm_perform_msr_protocol(struct svsm_call *call) 220 { 221 u8 pending = 0; 222 u64 val, resp; 223 224 /* 225 * When using the MSR protocol, be sure to save and restore 226 * the current MSR value. 227 */ 228 val = sev_es_rd_ghcb_msr(); 229 230 sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0)); 231 232 svsm_issue_call(call, &pending); 233 234 resp = sev_es_rd_ghcb_msr(); 235 236 sev_es_wr_ghcb_msr(val); 237 238 if (pending) 239 return -EINVAL; 240 241 if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP) 242 return -EINVAL; 243 244 if (GHCB_MSR_VMPL_RESP_VAL(resp)) 245 return -EINVAL; 246 247 return svsm_process_result_codes(call); 248 } 249 250 static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) 251 { 252 struct es_em_ctxt ctxt; 253 u8 pending = 0; 254 255 vc_ghcb_invalidate(ghcb); 256 257 /* 258 * Fill in protocol and format specifiers. This can be called very early 259 * in the boot, so use rip-relative references as needed. 260 */ 261 ghcb->protocol_version = ghcb_version; 262 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; 263 264 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); 265 ghcb_set_sw_exit_info_1(ghcb, 0); 266 ghcb_set_sw_exit_info_2(ghcb, 0); 267 268 sev_es_wr_ghcb_msr(__pa(ghcb)); 269 270 svsm_issue_call(call, &pending); 271 272 if (pending) 273 return -EINVAL; 274 275 switch (verify_exception_info(ghcb, &ctxt)) { 276 case ES_OK: 277 break; 278 case ES_EXCEPTION: 279 vc_forward_exception(&ctxt); 280 fallthrough; 281 default: 282 return -EINVAL; 283 } 284 285 return svsm_process_result_codes(call); 286 } 287 288 enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, 289 struct es_em_ctxt *ctxt, 290 u64 exit_code, u64 exit_info_1, 291 u64 exit_info_2) 292 { 293 /* Fill in protocol and format specifiers */ 294 ghcb->protocol_version = ghcb_version; 295 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; 296 297 ghcb_set_sw_exit_code(ghcb, exit_code); 298 ghcb_set_sw_exit_info_1(ghcb, exit_info_1); 299 ghcb_set_sw_exit_info_2(ghcb, exit_info_2); 300 301 sev_es_wr_ghcb_msr(__pa(ghcb)); 302 VMGEXIT(); 303 304 return verify_exception_info(ghcb, ctxt); 305 } 306 307 static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) 308 { 309 u64 val; 310 311 sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx)); 312 VMGEXIT(); 313 val = sev_es_rd_ghcb_msr(); 314 if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) 315 return -EIO; 316 317 *reg = (val >> 32); 318 319 return 0; 320 } 321 322 static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) 323 { 324 int ret; 325 326 /* 327 * MSR protocol does not support fetching non-zero subfunctions, but is 328 * sufficient to handle current early-boot cases. Should that change, 329 * make sure to report an error rather than ignoring the index and 330 * grabbing random values. If this issue arises in the future, handling 331 * can be added here to use GHCB-page protocol for cases that occur late 332 * enough in boot that GHCB page is available. 333 */ 334 if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn) 335 return -EINVAL; 336 337 ret = __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax); 338 ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx); 339 ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx); 340 ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx); 341 342 return ret; 343 } 344 345 static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) 346 { 347 u32 cr4 = native_read_cr4(); 348 int ret; 349 350 ghcb_set_rax(ghcb, leaf->fn); 351 ghcb_set_rcx(ghcb, leaf->subfn); 352 353 if (cr4 & X86_CR4_OSXSAVE) 354 /* Safe to read xcr0 */ 355 ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK)); 356 else 357 /* xgetbv will cause #UD - use reset value for xcr0 */ 358 ghcb_set_xcr0(ghcb, 1); 359 360 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0); 361 if (ret != ES_OK) 362 return ret; 363 364 if (!(ghcb_rax_is_valid(ghcb) && 365 ghcb_rbx_is_valid(ghcb) && 366 ghcb_rcx_is_valid(ghcb) && 367 ghcb_rdx_is_valid(ghcb))) 368 return ES_VMM_ERROR; 369 370 leaf->eax = ghcb->save.rax; 371 leaf->ebx = ghcb->save.rbx; 372 leaf->ecx = ghcb->save.rcx; 373 leaf->edx = ghcb->save.rdx; 374 375 return ES_OK; 376 } 377 378 static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) 379 { 380 return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) 381 : __sev_cpuid_hv_msr(leaf); 382 } 383 384 /* 385 * This may be called early while still running on the initial identity 386 * mapping. Use RIP-relative addressing to obtain the correct address 387 * while running with the initial identity mapping as well as the 388 * switch-over to kernel virtual addresses later. 389 */ 390 const struct snp_cpuid_table *snp_cpuid_get_table(void) 391 { 392 return rip_rel_ptr(&cpuid_table_copy); 393 } 394 395 /* 396 * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of 397 * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0 398 * and 1 based on the corresponding features enabled by a particular 399 * combination of XCR0 and XSS registers so that a guest can look up the 400 * version corresponding to the features currently enabled in its XCR0/XSS 401 * registers. The only values that differ between these versions/table 402 * entries is the enabled XSAVE area size advertised via EBX. 403 * 404 * While hypervisors may choose to make use of this support, it is more 405 * robust/secure for a guest to simply find the entry corresponding to the 406 * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the 407 * XSAVE area size using subfunctions 2 through 64, as documented in APM 408 * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here. 409 * 410 * Since base/legacy XSAVE area size is documented as 0x240, use that value 411 * directly rather than relying on the base size in the CPUID table. 412 * 413 * Return: XSAVE area size on success, 0 otherwise. 414 */ 415 static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) 416 { 417 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 418 u64 xfeatures_found = 0; 419 u32 xsave_size = 0x240; 420 int i; 421 422 for (i = 0; i < cpuid_table->count; i++) { 423 const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; 424 425 if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64)) 426 continue; 427 if (!(xfeatures_en & (BIT_ULL(e->ecx_in)))) 428 continue; 429 if (xfeatures_found & (BIT_ULL(e->ecx_in))) 430 continue; 431 432 xfeatures_found |= (BIT_ULL(e->ecx_in)); 433 434 if (compacted) 435 xsave_size += e->eax; 436 else 437 xsave_size = max(xsave_size, e->eax + e->ebx); 438 } 439 440 /* 441 * Either the guest set unsupported XCR0/XSS bits, or the corresponding 442 * entries in the CPUID table were not present. This is not a valid 443 * state to be in. 444 */ 445 if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2))) 446 return 0; 447 448 return xsave_size; 449 } 450 451 static bool __head 452 snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) 453 { 454 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 455 int i; 456 457 for (i = 0; i < cpuid_table->count; i++) { 458 const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; 459 460 if (e->eax_in != leaf->fn) 461 continue; 462 463 if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn) 464 continue; 465 466 /* 467 * For 0xD subfunctions 0 and 1, only use the entry corresponding 468 * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0). 469 * See the comments above snp_cpuid_calc_xsave_size() for more 470 * details. 471 */ 472 if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1)) 473 if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in) 474 continue; 475 476 leaf->eax = e->eax; 477 leaf->ebx = e->ebx; 478 leaf->ecx = e->ecx; 479 leaf->edx = e->edx; 480 481 return true; 482 } 483 484 return false; 485 } 486 487 static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) 488 { 489 if (sev_cpuid_hv(ghcb, ctxt, leaf)) 490 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); 491 } 492 493 static int __head 494 snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 495 struct cpuid_leaf *leaf) 496 { 497 struct cpuid_leaf leaf_hv = *leaf; 498 499 switch (leaf->fn) { 500 case 0x1: 501 snp_cpuid_hv(ghcb, ctxt, &leaf_hv); 502 503 /* initial APIC ID */ 504 leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); 505 /* APIC enabled bit */ 506 leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9)); 507 508 /* OSXSAVE enabled bit */ 509 if (native_read_cr4() & X86_CR4_OSXSAVE) 510 leaf->ecx |= BIT(27); 511 break; 512 case 0x7: 513 /* OSPKE enabled bit */ 514 leaf->ecx &= ~BIT(4); 515 if (native_read_cr4() & X86_CR4_PKE) 516 leaf->ecx |= BIT(4); 517 break; 518 case 0xB: 519 leaf_hv.subfn = 0; 520 snp_cpuid_hv(ghcb, ctxt, &leaf_hv); 521 522 /* extended APIC ID */ 523 leaf->edx = leaf_hv.edx; 524 break; 525 case 0xD: { 526 bool compacted = false; 527 u64 xcr0 = 1, xss = 0; 528 u32 xsave_size; 529 530 if (leaf->subfn != 0 && leaf->subfn != 1) 531 return 0; 532 533 if (native_read_cr4() & X86_CR4_OSXSAVE) 534 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 535 if (leaf->subfn == 1) { 536 /* Get XSS value if XSAVES is enabled. */ 537 if (leaf->eax & BIT(3)) { 538 unsigned long lo, hi; 539 540 asm volatile("rdmsr" : "=a" (lo), "=d" (hi) 541 : "c" (MSR_IA32_XSS)); 542 xss = (hi << 32) | lo; 543 } 544 545 /* 546 * The PPR and APM aren't clear on what size should be 547 * encoded in 0xD:0x1:EBX when compaction is not enabled 548 * by either XSAVEC (feature bit 1) or XSAVES (feature 549 * bit 3) since SNP-capable hardware has these feature 550 * bits fixed as 1. KVM sets it to 0 in this case, but 551 * to avoid this becoming an issue it's safer to simply 552 * treat this as unsupported for SNP guests. 553 */ 554 if (!(leaf->eax & (BIT(1) | BIT(3)))) 555 return -EINVAL; 556 557 compacted = true; 558 } 559 560 xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted); 561 if (!xsave_size) 562 return -EINVAL; 563 564 leaf->ebx = xsave_size; 565 } 566 break; 567 case 0x8000001E: 568 snp_cpuid_hv(ghcb, ctxt, &leaf_hv); 569 570 /* extended APIC ID */ 571 leaf->eax = leaf_hv.eax; 572 /* compute ID */ 573 leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0)); 574 /* node ID */ 575 leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0)); 576 break; 577 default: 578 /* No fix-ups needed, use values as-is. */ 579 break; 580 } 581 582 return 0; 583 } 584 585 /* 586 * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value 587 * should be treated as fatal by caller. 588 */ 589 int __head 590 snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) 591 { 592 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 593 594 if (!cpuid_table->count) 595 return -EOPNOTSUPP; 596 597 if (!snp_cpuid_get_validated_func(leaf)) { 598 /* 599 * Some hypervisors will avoid keeping track of CPUID entries 600 * where all values are zero, since they can be handled the 601 * same as out-of-range values (all-zero). This is useful here 602 * as well as it allows virtually all guest configurations to 603 * work using a single SNP CPUID table. 604 * 605 * To allow for this, there is a need to distinguish between 606 * out-of-range entries and in-range zero entries, since the 607 * CPUID table entries are only a template that may need to be 608 * augmented with additional values for things like 609 * CPU-specific information during post-processing. So if it's 610 * not in the table, set the values to zero. Then, if they are 611 * within a valid CPUID range, proceed with post-processing 612 * using zeros as the initial values. Otherwise, skip 613 * post-processing and just return zeros immediately. 614 */ 615 leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; 616 617 /* Skip post-processing for out-of-range zero leafs. */ 618 if (!(leaf->fn <= cpuid_std_range_max || 619 (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || 620 (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) 621 return 0; 622 } 623 624 return snp_cpuid_postprocess(ghcb, ctxt, leaf); 625 } 626 627 /* 628 * Boot VC Handler - This is the first VC handler during boot, there is no GHCB 629 * page yet, so it only supports the MSR based communication with the 630 * hypervisor and only the CPUID exit-code. 631 */ 632 void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) 633 { 634 unsigned int subfn = lower_bits(regs->cx, 32); 635 unsigned int fn = lower_bits(regs->ax, 32); 636 u16 opcode = *(unsigned short *)regs->ip; 637 struct cpuid_leaf leaf; 638 int ret; 639 640 /* Only CPUID is supported via MSR protocol */ 641 if (exit_code != SVM_EXIT_CPUID) 642 goto fail; 643 644 /* Is it really a CPUID insn? */ 645 if (opcode != 0xa20f) 646 goto fail; 647 648 leaf.fn = fn; 649 leaf.subfn = subfn; 650 651 ret = snp_cpuid(NULL, NULL, &leaf); 652 if (!ret) 653 goto cpuid_done; 654 655 if (ret != -EOPNOTSUPP) 656 goto fail; 657 658 if (__sev_cpuid_hv_msr(&leaf)) 659 goto fail; 660 661 cpuid_done: 662 regs->ax = leaf.eax; 663 regs->bx = leaf.ebx; 664 regs->cx = leaf.ecx; 665 regs->dx = leaf.edx; 666 667 /* 668 * This is a VC handler and the #VC is only raised when SEV-ES is 669 * active, which means SEV must be active too. Do sanity checks on the 670 * CPUID results to make sure the hypervisor does not trick the kernel 671 * into the no-sev path. This could map sensitive data unencrypted and 672 * make it accessible to the hypervisor. 673 * 674 * In particular, check for: 675 * - Availability of CPUID leaf 0x8000001f 676 * - SEV CPUID bit. 677 * 678 * The hypervisor might still report the wrong C-bit position, but this 679 * can't be checked here. 680 */ 681 682 if (fn == 0x80000000 && (regs->ax < 0x8000001f)) 683 /* SEV leaf check */ 684 goto fail; 685 else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) 686 /* SEV bit */ 687 goto fail; 688 689 /* Skip over the CPUID two-byte opcode */ 690 regs->ip += 2; 691 692 return; 693 694 fail: 695 /* Terminate the guest */ 696 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 697 } 698 699 struct cc_setup_data { 700 struct setup_data header; 701 u32 cc_blob_address; 702 }; 703 704 /* 705 * Search for a Confidential Computing blob passed in as a setup_data entry 706 * via the Linux Boot Protocol. 707 */ 708 static __head 709 struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) 710 { 711 struct cc_setup_data *sd = NULL; 712 struct setup_data *hdr; 713 714 hdr = (struct setup_data *)bp->hdr.setup_data; 715 716 while (hdr) { 717 if (hdr->type == SETUP_CC_BLOB) { 718 sd = (struct cc_setup_data *)hdr; 719 return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; 720 } 721 hdr = (struct setup_data *)hdr->next; 722 } 723 724 return NULL; 725 } 726 727 /* 728 * Initialize the kernel's copy of the SNP CPUID table, and set up the 729 * pointer that will be used to access it. 730 * 731 * Maintaining a direct mapping of the SNP CPUID table used by firmware would 732 * be possible as an alternative, but the approach is brittle since the 733 * mapping needs to be updated in sync with all the changes to virtual memory 734 * layout and related mapping facilities throughout the boot process. 735 */ 736 static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) 737 { 738 const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; 739 int i; 740 741 if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) 742 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); 743 744 cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; 745 if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) 746 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); 747 748 cpuid_table = snp_cpuid_get_table(); 749 memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table)); 750 751 /* Initialize CPUID ranges for range-checking. */ 752 for (i = 0; i < cpuid_table->count; i++) { 753 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 754 755 if (fn->eax_in == 0x0) 756 cpuid_std_range_max = fn->eax; 757 else if (fn->eax_in == 0x40000000) 758 cpuid_hyp_range_max = fn->eax; 759 else if (fn->eax_in == 0x80000000) 760 cpuid_ext_range_max = fn->eax; 761 } 762 } 763 764 static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) 765 { 766 struct svsm_pvalidate_call *pc; 767 struct svsm_call call = {}; 768 unsigned long flags; 769 u64 pc_pa; 770 int ret; 771 772 /* 773 * This can be called very early in the boot, use native functions in 774 * order to avoid paravirt issues. 775 */ 776 flags = native_local_irq_save(); 777 778 call.caa = svsm_get_caa(); 779 780 pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; 781 pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); 782 783 pc->num_entries = 1; 784 pc->cur_index = 0; 785 pc->entry[0].page_size = RMP_PG_SIZE_4K; 786 pc->entry[0].action = validate; 787 pc->entry[0].ignore_cf = 0; 788 pc->entry[0].rsvd = 0; 789 pc->entry[0].pfn = paddr >> PAGE_SHIFT; 790 791 /* Protocol 0, Call ID 1 */ 792 call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); 793 call.rcx = pc_pa; 794 795 ret = svsm_perform_call_protocol(&call); 796 if (ret) 797 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); 798 799 native_local_irq_restore(flags); 800 } 801 802 static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, 803 bool validate) 804 { 805 int ret; 806 807 if (snp_vmpl) { 808 svsm_pval_4k_page(paddr, validate); 809 } else { 810 ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); 811 if (ret) 812 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); 813 } 814 815 /* 816 * If validating memory (making it private) and affected by the 817 * cache-coherency vulnerability, perform the cache eviction mitigation. 818 */ 819 if (validate && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO)) 820 sev_evict_cache((void *)vaddr, 1); 821 } 822 823 /* 824 * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM 825 * services needed when not running in VMPL0. 826 */ 827 static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) 828 { 829 struct snp_secrets_page *secrets_page; 830 struct snp_cpuid_table *cpuid_table; 831 unsigned int i; 832 u64 caa; 833 834 BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE); 835 836 /* 837 * Check if running at VMPL0. 838 * 839 * Use RMPADJUST (see the rmpadjust() function for a description of what 840 * the instruction does) to update the VMPL1 permissions of a page. If 841 * the guest is running at VMPL0, this will succeed and implies there is 842 * no SVSM. If the guest is running at any other VMPL, this will fail. 843 * Linux SNP guests only ever run at a single VMPL level so permission mask 844 * changes of a lesser-privileged VMPL are a don't-care. 845 * 846 * Use a rip-relative reference to obtain the proper address, since this 847 * routine is running identity mapped when called, both by the decompressor 848 * code and the early kernel code. 849 */ 850 if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1)) 851 return false; 852 853 /* 854 * Not running at VMPL0, ensure everything has been properly supplied 855 * for running under an SVSM. 856 */ 857 if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE) 858 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE); 859 860 secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys; 861 if (!secrets_page->svsm_size) 862 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM); 863 864 if (!secrets_page->svsm_guest_vmpl) 865 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0); 866 867 snp_vmpl = secrets_page->svsm_guest_vmpl; 868 869 caa = secrets_page->svsm_caa; 870 871 /* 872 * An open-coded PAGE_ALIGNED() in order to avoid including 873 * kernel-proper headers into the decompressor. 874 */ 875 if (caa & (PAGE_SIZE - 1)) 876 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA); 877 878 /* 879 * The CA is identity mapped when this routine is called, both by the 880 * decompressor code and the early kernel code. 881 */ 882 boot_svsm_caa = (struct svsm_ca *)caa; 883 boot_svsm_caa_pa = caa; 884 885 /* Advertise the SVSM presence via CPUID. */ 886 cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table(); 887 for (i = 0; i < cpuid_table->count; i++) { 888 struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 889 890 if (fn->eax_in == 0x8000001f) 891 fn->eax |= BIT(28); 892 } 893 894 return true; 895 } 896