1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/cpumask.h> 22 #include <linux/efi.h> 23 #include <linux/platform_device.h> 24 #include <linux/io.h> 25 #include <linux/psp-sev.h> 26 #include <linux/dmi.h> 27 #include <uapi/linux/sev-guest.h> 28 #include <crypto/gcm.h> 29 30 #include <asm/init.h> 31 #include <asm/cpu_entry_area.h> 32 #include <asm/stacktrace.h> 33 #include <asm/sev.h> 34 #include <asm/insn-eval.h> 35 #include <asm/fpu/xcr.h> 36 #include <asm/processor.h> 37 #include <asm/realmode.h> 38 #include <asm/setup.h> 39 #include <asm/traps.h> 40 #include <asm/svm.h> 41 #include <asm/smp.h> 42 #include <asm/cpu.h> 43 #include <asm/apic.h> 44 #include <asm/cpuid/api.h> 45 #include <asm/cmdline.h> 46 #include <asm/msr.h> 47 48 #include "internal.h" 49 50 /* Bitmap of SEV features supported by the hypervisor */ 51 u64 sev_hv_features __ro_after_init; 52 SYM_PIC_ALIAS(sev_hv_features); 53 54 /* Secrets page physical address from the CC blob */ 55 u64 sev_secrets_pa __ro_after_init; 56 SYM_PIC_ALIAS(sev_secrets_pa); 57 58 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 59 #define AP_INIT_CS_LIMIT 0xffff 60 #define AP_INIT_DS_LIMIT 0xffff 61 #define AP_INIT_LDTR_LIMIT 0xffff 62 #define AP_INIT_GDTR_LIMIT 0xffff 63 #define AP_INIT_IDTR_LIMIT 0xffff 64 #define AP_INIT_TR_LIMIT 0xffff 65 #define AP_INIT_RFLAGS_DEFAULT 0x2 66 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 67 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 68 #define AP_INIT_XCR0_DEFAULT 0x1 69 #define AP_INIT_X87_FTW_DEFAULT 0x5555 70 #define AP_INIT_X87_FCW_DEFAULT 0x0040 71 #define AP_INIT_CR0_DEFAULT 0x60000010 72 #define AP_INIT_MXCSR_DEFAULT 0x1f80 73 74 static const char * const sev_status_feat_names[] = { 75 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 76 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 77 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 78 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 79 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 80 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 81 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 82 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 83 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 84 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 85 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 86 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 87 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 88 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 89 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 90 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 91 [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC", 92 }; 93 94 /* 95 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 96 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 97 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 98 */ 99 static u64 snp_tsc_scale __ro_after_init; 100 static u64 snp_tsc_offset __ro_after_init; 101 static unsigned long snp_tsc_freq_khz __ro_after_init; 102 103 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 104 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 105 106 /* 107 * SVSM related information: 108 * When running under an SVSM, the VMPL that Linux is executing at must be 109 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM. 110 */ 111 u8 snp_vmpl __ro_after_init; 112 EXPORT_SYMBOL_GPL(snp_vmpl); 113 SYM_PIC_ALIAS(snp_vmpl); 114 115 /* 116 * Since feature negotiation related variables are set early in the boot 117 * process they must reside in the .data section so as not to be zeroed 118 * out when the .bss section is later cleared. 119 * 120 * GHCB protocol version negotiated with the hypervisor. 121 */ 122 u16 ghcb_version __ro_after_init; 123 SYM_PIC_ALIAS(ghcb_version); 124 125 /* For early boot hypervisor communication in SEV-ES enabled guests */ 126 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 127 128 /* 129 * Needs to be in the .data section because we need it NULL before bss is 130 * cleared 131 */ 132 struct ghcb *boot_ghcb __section(".data"); 133 134 static u64 __init get_snp_jump_table_addr(void) 135 { 136 struct snp_secrets_page *secrets; 137 void __iomem *mem; 138 u64 addr; 139 140 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE); 141 if (!mem) { 142 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 143 return 0; 144 } 145 146 secrets = (__force struct snp_secrets_page *)mem; 147 148 addr = secrets->os_area.ap_jump_table_pa; 149 iounmap(mem); 150 151 return addr; 152 } 153 154 static u64 __init get_jump_table_addr(void) 155 { 156 struct ghcb_state state; 157 unsigned long flags; 158 struct ghcb *ghcb; 159 u64 ret = 0; 160 161 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 162 return get_snp_jump_table_addr(); 163 164 local_irq_save(flags); 165 166 ghcb = __sev_get_ghcb(&state); 167 168 vc_ghcb_invalidate(ghcb); 169 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 170 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 171 ghcb_set_sw_exit_info_2(ghcb, 0); 172 173 sev_es_wr_ghcb_msr(__pa(ghcb)); 174 VMGEXIT(); 175 176 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 177 ghcb_sw_exit_info_2_is_valid(ghcb)) 178 ret = ghcb->save.sw_exit_info_2; 179 180 __sev_put_ghcb(&state); 181 182 local_irq_restore(flags); 183 184 return ret; 185 } 186 187 static void pval_pages(struct snp_psc_desc *desc) 188 { 189 struct psc_entry *e; 190 unsigned long vaddr; 191 unsigned int size; 192 unsigned int i; 193 bool validate; 194 u64 pfn; 195 int rc; 196 197 for (i = 0; i <= desc->hdr.end_entry; i++) { 198 e = &desc->entries[i]; 199 200 pfn = e->gfn; 201 vaddr = (unsigned long)pfn_to_kaddr(pfn); 202 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; 203 validate = e->operation == SNP_PAGE_STATE_PRIVATE; 204 205 rc = pvalidate(vaddr, size, validate); 206 if (!rc) 207 continue; 208 209 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { 210 unsigned long vaddr_end = vaddr + PMD_SIZE; 211 212 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) { 213 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); 214 if (rc) 215 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0); 216 } 217 } else { 218 __pval_terminate(pfn, validate, size, rc, 0); 219 } 220 } 221 } 222 223 static void pvalidate_pages(struct snp_psc_desc *desc) 224 { 225 struct psc_entry *e; 226 unsigned int i; 227 228 if (snp_vmpl) 229 svsm_pval_pages(desc); 230 else 231 pval_pages(desc); 232 233 /* 234 * If not affected by the cache-coherency vulnerability there is no need 235 * to perform the cache eviction mitigation. 236 */ 237 if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) 238 return; 239 240 for (i = 0; i <= desc->hdr.end_entry; i++) { 241 e = &desc->entries[i]; 242 243 /* 244 * If validating memory (making it private) perform the cache 245 * eviction mitigation. 246 */ 247 if (e->operation == SNP_PAGE_STATE_PRIVATE) 248 sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); 249 } 250 } 251 252 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) 253 { 254 int cur_entry, end_entry, ret = 0; 255 struct snp_psc_desc *data; 256 struct es_em_ctxt ctxt; 257 258 vc_ghcb_invalidate(ghcb); 259 260 /* Copy the input desc into GHCB shared buffer */ 261 data = (struct snp_psc_desc *)ghcb->shared_buffer; 262 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); 263 264 /* 265 * As per the GHCB specification, the hypervisor can resume the guest 266 * before processing all the entries. Check whether all the entries 267 * are processed. If not, then keep retrying. Note, the hypervisor 268 * will update the data memory directly to indicate the status, so 269 * reference the data->hdr everywhere. 270 * 271 * The strategy here is to wait for the hypervisor to change the page 272 * state in the RMP table before guest accesses the memory pages. If the 273 * page state change was not successful, then later memory access will 274 * result in a crash. 275 */ 276 cur_entry = data->hdr.cur_entry; 277 end_entry = data->hdr.end_entry; 278 279 while (data->hdr.cur_entry <= data->hdr.end_entry) { 280 ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); 281 282 /* This will advance the shared buffer data points to. */ 283 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); 284 285 /* 286 * Page State Change VMGEXIT can pass error code through 287 * exit_info_2. 288 */ 289 if (WARN(ret || ghcb->save.sw_exit_info_2, 290 "SNP: PSC failed ret=%d exit_info_2=%llx\n", 291 ret, ghcb->save.sw_exit_info_2)) { 292 ret = 1; 293 goto out; 294 } 295 296 /* Verify that reserved bit is not set */ 297 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { 298 ret = 1; 299 goto out; 300 } 301 302 /* 303 * Sanity check that entry processing is not going backwards. 304 * This will happen only if hypervisor is tricking us. 305 */ 306 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, 307 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", 308 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { 309 ret = 1; 310 goto out; 311 } 312 } 313 314 out: 315 return ret; 316 } 317 318 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 319 unsigned long vaddr_end, int op) 320 { 321 struct ghcb_state state; 322 bool use_large_entry; 323 struct psc_hdr *hdr; 324 struct psc_entry *e; 325 unsigned long flags; 326 unsigned long pfn; 327 struct ghcb *ghcb; 328 int i; 329 330 hdr = &data->hdr; 331 e = data->entries; 332 333 memset(data, 0, sizeof(*data)); 334 i = 0; 335 336 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 337 hdr->end_entry = i; 338 339 if (is_vmalloc_addr((void *)vaddr)) { 340 pfn = vmalloc_to_pfn((void *)vaddr); 341 use_large_entry = false; 342 } else { 343 pfn = __pa(vaddr) >> PAGE_SHIFT; 344 use_large_entry = true; 345 } 346 347 e->gfn = pfn; 348 e->operation = op; 349 350 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 351 (vaddr_end - vaddr) >= PMD_SIZE) { 352 e->pagesize = RMP_PG_SIZE_2M; 353 vaddr += PMD_SIZE; 354 } else { 355 e->pagesize = RMP_PG_SIZE_4K; 356 vaddr += PAGE_SIZE; 357 } 358 359 e++; 360 i++; 361 } 362 363 /* Page validation must be rescinded before changing to shared */ 364 if (op == SNP_PAGE_STATE_SHARED) 365 pvalidate_pages(data); 366 367 local_irq_save(flags); 368 369 if (sev_cfg.ghcbs_initialized) 370 ghcb = __sev_get_ghcb(&state); 371 else 372 ghcb = boot_ghcb; 373 374 /* Invoke the hypervisor to perform the page state changes */ 375 if (!ghcb || vmgexit_psc(ghcb, data)) 376 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 377 378 if (sev_cfg.ghcbs_initialized) 379 __sev_put_ghcb(&state); 380 381 local_irq_restore(flags); 382 383 /* Page validation must be performed after changing to private */ 384 if (op == SNP_PAGE_STATE_PRIVATE) 385 pvalidate_pages(data); 386 387 return vaddr; 388 } 389 390 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 391 { 392 struct snp_psc_desc desc; 393 unsigned long vaddr_end; 394 395 /* Use the MSR protocol when a GHCB is not available. */ 396 if (!boot_ghcb) { 397 struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() }; 398 399 return early_set_pages_state(vaddr, __pa(vaddr), npages, &d); 400 } 401 402 vaddr = vaddr & PAGE_MASK; 403 vaddr_end = vaddr + (npages << PAGE_SHIFT); 404 405 while (vaddr < vaddr_end) 406 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 407 } 408 409 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 410 { 411 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 412 return; 413 414 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 415 } 416 417 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 418 { 419 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 420 return; 421 422 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 423 } 424 425 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 426 { 427 unsigned long vaddr, npages; 428 429 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 430 return; 431 432 vaddr = (unsigned long)__va(start); 433 npages = (end - start) >> PAGE_SHIFT; 434 435 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 436 } 437 438 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) 439 { 440 bool create = event != SVM_VMGEXIT_AP_DESTROY; 441 struct ghcb_state state; 442 unsigned long flags; 443 struct ghcb *ghcb; 444 int ret = 0; 445 446 local_irq_save(flags); 447 448 ghcb = __sev_get_ghcb(&state); 449 450 vc_ghcb_invalidate(ghcb); 451 452 if (create) 453 ghcb_set_rax(ghcb, vmsa->sev_features); 454 455 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 456 ghcb_set_sw_exit_info_1(ghcb, 457 ((u64)apic_id << 32) | 458 ((u64)snp_vmpl << 16) | 459 event); 460 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 461 462 sev_es_wr_ghcb_msr(__pa(ghcb)); 463 VMGEXIT(); 464 465 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 466 lower_32_bits(ghcb->save.sw_exit_info_1)) { 467 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); 468 ret = -EINVAL; 469 } 470 471 __sev_put_ghcb(&state); 472 473 local_irq_restore(flags); 474 475 return ret; 476 } 477 478 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 479 { 480 int ret; 481 482 if (snp_vmpl) { 483 struct svsm_call call = {}; 484 unsigned long flags; 485 486 local_irq_save(flags); 487 488 call.caa = this_cpu_read(svsm_caa); 489 call.rcx = __pa(va); 490 491 if (make_vmsa) { 492 /* Protocol 0, Call ID 2 */ 493 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 494 call.rdx = __pa(caa); 495 call.r8 = apic_id; 496 } else { 497 /* Protocol 0, Call ID 3 */ 498 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 499 } 500 501 ret = svsm_perform_call_protocol(&call); 502 503 local_irq_restore(flags); 504 } else { 505 /* 506 * If the kernel runs at VMPL0, it can change the VMSA 507 * bit for a page using the RMPADJUST instruction. 508 * However, for the instruction to succeed it must 509 * target the permissions of a lesser privileged (higher 510 * numbered) VMPL level, so use VMPL1. 511 */ 512 u64 attrs = 1; 513 514 if (make_vmsa) 515 attrs |= RMPADJUST_VMSA_PAGE_BIT; 516 517 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 518 } 519 520 return ret; 521 } 522 523 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 524 { 525 int err; 526 527 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 528 if (err) 529 pr_err("clear VMSA page failed (%u), leaking page\n", err); 530 else 531 free_page((unsigned long)vmsa); 532 } 533 534 static void set_pte_enc(pte_t *kpte, int level, void *va) 535 { 536 struct pte_enc_desc d = { 537 .kpte = kpte, 538 .pte_level = level, 539 .va = va, 540 .encrypt = true 541 }; 542 543 prepare_pte_enc(&d); 544 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 545 } 546 547 static void unshare_all_memory(void) 548 { 549 unsigned long addr, end, size, ghcb; 550 struct sev_es_runtime_data *data; 551 unsigned int npages, level; 552 bool skipped_addr; 553 pte_t *pte; 554 int cpu; 555 556 /* Unshare the direct mapping. */ 557 addr = PAGE_OFFSET; 558 end = PAGE_OFFSET + get_max_mapped(); 559 560 while (addr < end) { 561 pte = lookup_address(addr, &level); 562 size = page_level_size(level); 563 npages = size / PAGE_SIZE; 564 skipped_addr = false; 565 566 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 567 addr += size; 568 continue; 569 } 570 571 /* 572 * Ensure that all the per-CPU GHCBs are made private at the 573 * end of the unsharing loop so that the switch to the slower 574 * MSR protocol happens last. 575 */ 576 for_each_possible_cpu(cpu) { 577 data = per_cpu(runtime_data, cpu); 578 ghcb = (unsigned long)&data->ghcb_page; 579 580 /* Handle the case of a huge page containing the GHCB page */ 581 if (addr <= ghcb && ghcb < addr + size) { 582 skipped_addr = true; 583 break; 584 } 585 } 586 587 if (!skipped_addr) { 588 set_pte_enc(pte, level, (void *)addr); 589 snp_set_memory_private(addr, npages); 590 } 591 addr += size; 592 } 593 594 /* Unshare all bss decrypted memory. */ 595 addr = (unsigned long)__start_bss_decrypted; 596 end = (unsigned long)__start_bss_decrypted_unused; 597 npages = (end - addr) >> PAGE_SHIFT; 598 599 for (; addr < end; addr += PAGE_SIZE) { 600 pte = lookup_address(addr, &level); 601 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 602 continue; 603 604 set_pte_enc(pte, level, (void *)addr); 605 } 606 addr = (unsigned long)__start_bss_decrypted; 607 snp_set_memory_private(addr, npages); 608 609 __flush_tlb_all(); 610 } 611 612 /* Stop new private<->shared conversions */ 613 void snp_kexec_begin(void) 614 { 615 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 616 return; 617 618 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 619 return; 620 621 /* 622 * Crash kernel ends up here with interrupts disabled: can't wait for 623 * conversions to finish. 624 * 625 * If race happened, just report and proceed. 626 */ 627 if (!set_memory_enc_stop_conversion()) 628 pr_warn("Failed to stop shared<->private conversions\n"); 629 } 630 631 /* 632 * Shutdown all APs except the one handling kexec/kdump and clearing 633 * the VMSA tag on AP's VMSA pages as they are not being used as 634 * VMSA page anymore. 635 */ 636 static void shutdown_all_aps(void) 637 { 638 struct sev_es_save_area *vmsa; 639 int apic_id, this_cpu, cpu; 640 641 this_cpu = get_cpu(); 642 643 /* 644 * APs are already in HLT loop when enc_kexec_finish() callback 645 * is invoked. 646 */ 647 for_each_present_cpu(cpu) { 648 vmsa = per_cpu(sev_vmsa, cpu); 649 650 /* 651 * The BSP or offlined APs do not have guest allocated VMSA 652 * and there is no need to clear the VMSA tag for this page. 653 */ 654 if (!vmsa) 655 continue; 656 657 /* 658 * Cannot clear the VMSA tag for the currently running vCPU. 659 */ 660 if (this_cpu == cpu) { 661 unsigned long pa; 662 struct page *p; 663 664 pa = __pa(vmsa); 665 /* 666 * Mark the VMSA page of the running vCPU as offline 667 * so that is excluded and not touched by makedumpfile 668 * while generating vmcore during kdump. 669 */ 670 p = pfn_to_online_page(pa >> PAGE_SHIFT); 671 if (p) 672 __SetPageOffline(p); 673 continue; 674 } 675 676 apic_id = cpuid_to_apicid[cpu]; 677 678 /* 679 * Issue AP destroy to ensure AP gets kicked out of guest mode 680 * to allow using RMPADJUST to remove the VMSA tag on it's 681 * VMSA page. 682 */ 683 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); 684 snp_cleanup_vmsa(vmsa, apic_id); 685 } 686 687 put_cpu(); 688 } 689 690 void snp_kexec_finish(void) 691 { 692 struct sev_es_runtime_data *data; 693 unsigned long size, addr; 694 unsigned int level, cpu; 695 struct ghcb *ghcb; 696 pte_t *pte; 697 698 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 699 return; 700 701 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 702 return; 703 704 shutdown_all_aps(); 705 706 unshare_all_memory(); 707 708 /* 709 * Switch to using the MSR protocol to change per-CPU GHCBs to 710 * private. All the per-CPU GHCBs have been switched back to private, 711 * so can't do any more GHCB calls to the hypervisor beyond this point 712 * until the kexec'ed kernel starts running. 713 */ 714 boot_ghcb = NULL; 715 sev_cfg.ghcbs_initialized = false; 716 717 for_each_possible_cpu(cpu) { 718 data = per_cpu(runtime_data, cpu); 719 ghcb = &data->ghcb_page; 720 pte = lookup_address((unsigned long)ghcb, &level); 721 size = page_level_size(level); 722 /* Handle the case of a huge page containing the GHCB page */ 723 addr = (unsigned long)ghcb & page_level_mask(level); 724 set_pte_enc(pte, level, (void *)addr); 725 snp_set_memory_private(addr, (size / PAGE_SIZE)); 726 } 727 } 728 729 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 730 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 731 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 732 733 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 734 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 735 736 static void *snp_alloc_vmsa_page(int cpu) 737 { 738 struct page *p; 739 740 /* 741 * Allocate VMSA page to work around the SNP erratum where the CPU will 742 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 743 * collides with the RMP entry of VMSA page. The recommended workaround 744 * is to not use a large page. 745 * 746 * Allocate an 8k page which is also 8k-aligned. 747 */ 748 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 749 if (!p) 750 return NULL; 751 752 split_page(p, 1); 753 754 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 755 __free_page(p); 756 757 return page_address(p + 1); 758 } 759 760 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu) 761 { 762 struct sev_es_save_area *cur_vmsa, *vmsa; 763 struct svsm_ca *caa; 764 u8 sipi_vector; 765 int ret; 766 u64 cr4; 767 768 /* 769 * The hypervisor SNP feature support check has happened earlier, just check 770 * the AP_CREATION one here. 771 */ 772 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 773 return -EOPNOTSUPP; 774 775 /* 776 * Verify the desired start IP against the known trampoline start IP 777 * to catch any future new trampolines that may be introduced that 778 * would require a new protected guest entry point. 779 */ 780 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 781 "Unsupported SNP start_ip: %lx\n", start_ip)) 782 return -EINVAL; 783 784 /* Override start_ip with known protected guest start IP */ 785 start_ip = real_mode_header->sev_es_trampoline_start; 786 cur_vmsa = per_cpu(sev_vmsa, cpu); 787 788 /* 789 * A new VMSA is created each time because there is no guarantee that 790 * the current VMSA is the kernels or that the vCPU is not running. If 791 * an attempt was done to use the current VMSA with a running vCPU, a 792 * #VMEXIT of that vCPU would wipe out all of the settings being done 793 * here. 794 */ 795 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 796 if (!vmsa) 797 return -ENOMEM; 798 799 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 800 caa = per_cpu(svsm_caa, cpu); 801 802 /* CR4 should maintain the MCE value */ 803 cr4 = native_read_cr4() & X86_CR4_MCE; 804 805 /* Set the CS value based on the start_ip converted to a SIPI vector */ 806 sipi_vector = (start_ip >> 12); 807 vmsa->cs.base = sipi_vector << 12; 808 vmsa->cs.limit = AP_INIT_CS_LIMIT; 809 vmsa->cs.attrib = INIT_CS_ATTRIBS; 810 vmsa->cs.selector = sipi_vector << 8; 811 812 /* Set the RIP value based on start_ip */ 813 vmsa->rip = start_ip & 0xfff; 814 815 /* Set AP INIT defaults as documented in the APM */ 816 vmsa->ds.limit = AP_INIT_DS_LIMIT; 817 vmsa->ds.attrib = INIT_DS_ATTRIBS; 818 vmsa->es = vmsa->ds; 819 vmsa->fs = vmsa->ds; 820 vmsa->gs = vmsa->ds; 821 vmsa->ss = vmsa->ds; 822 823 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 824 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 825 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 826 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 827 vmsa->tr.limit = AP_INIT_TR_LIMIT; 828 vmsa->tr.attrib = INIT_TR_ATTRIBS; 829 830 vmsa->cr4 = cr4; 831 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 832 vmsa->dr7 = DR7_RESET_VALUE; 833 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 834 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 835 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 836 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 837 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 838 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 839 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 840 841 if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) 842 vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK; 843 844 /* SVME must be set. */ 845 vmsa->efer = EFER_SVME; 846 847 /* 848 * Set the SNP-specific fields for this VMSA: 849 * VMPL level 850 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 851 */ 852 vmsa->vmpl = snp_vmpl; 853 vmsa->sev_features = sev_status >> 2; 854 855 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 856 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 857 vmsa->tsc_scale = snp_tsc_scale; 858 vmsa->tsc_offset = snp_tsc_offset; 859 } 860 861 /* Switch the page over to a VMSA page now that it is initialized */ 862 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 863 if (ret) { 864 pr_err("set VMSA page failed (%u)\n", ret); 865 free_page((unsigned long)vmsa); 866 867 return -EINVAL; 868 } 869 870 /* Issue VMGEXIT AP Creation NAE event */ 871 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); 872 if (ret) { 873 snp_cleanup_vmsa(vmsa, apic_id); 874 vmsa = NULL; 875 } 876 877 /* Free up any previous VMSA page */ 878 if (cur_vmsa) 879 snp_cleanup_vmsa(cur_vmsa, apic_id); 880 881 /* Record the current VMSA page */ 882 per_cpu(sev_vmsa, cpu) = vmsa; 883 884 return ret; 885 } 886 887 void __init snp_set_wakeup_secondary_cpu(void) 888 { 889 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 890 return; 891 892 /* 893 * Always set this override if SNP is enabled. This makes it the 894 * required method to start APs under SNP. If the hypervisor does 895 * not support AP creation, then no APs will be started. 896 */ 897 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 898 } 899 900 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 901 { 902 u16 startup_cs, startup_ip; 903 phys_addr_t jump_table_pa; 904 u64 jump_table_addr; 905 u16 __iomem *jump_table; 906 907 jump_table_addr = get_jump_table_addr(); 908 909 /* On UP guests there is no jump table so this is not a failure */ 910 if (!jump_table_addr) 911 return 0; 912 913 /* Check if AP Jump Table is page-aligned */ 914 if (jump_table_addr & ~PAGE_MASK) 915 return -EINVAL; 916 917 jump_table_pa = jump_table_addr & PAGE_MASK; 918 919 startup_cs = (u16)(rmh->trampoline_start >> 4); 920 startup_ip = (u16)(rmh->sev_es_trampoline_start - 921 rmh->trampoline_start); 922 923 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 924 if (!jump_table) 925 return -EIO; 926 927 writew(startup_ip, &jump_table[0]); 928 writew(startup_cs, &jump_table[1]); 929 930 iounmap(jump_table); 931 932 return 0; 933 } 934 935 /* 936 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 937 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 938 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 939 * 940 * When running under SVSM the CA page is needed too, so map it as well. 941 */ 942 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd) 943 { 944 unsigned long address, pflags, pflags_enc; 945 struct sev_es_runtime_data *data; 946 int cpu; 947 u64 pfn; 948 949 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 950 return 0; 951 952 pflags = _PAGE_NX | _PAGE_RW; 953 pflags_enc = cc_mkenc(pflags); 954 955 for_each_possible_cpu(cpu) { 956 data = per_cpu(runtime_data, cpu); 957 958 address = __pa(&data->ghcb_page); 959 pfn = address >> PAGE_SHIFT; 960 961 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 962 return 1; 963 964 if (snp_vmpl) { 965 address = per_cpu(svsm_caa_pa, cpu); 966 if (!address) 967 return 1; 968 969 pfn = address >> PAGE_SHIFT; 970 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc)) 971 return 1; 972 } 973 } 974 975 return 0; 976 } 977 978 u64 savic_ghcb_msr_read(u32 reg) 979 { 980 u64 msr = APIC_BASE_MSR + (reg >> 4); 981 struct pt_regs regs = { .cx = msr }; 982 struct es_em_ctxt ctxt = { .regs = ®s }; 983 struct ghcb_state state; 984 enum es_result res; 985 struct ghcb *ghcb; 986 987 guard(irqsave)(); 988 989 ghcb = __sev_get_ghcb(&state); 990 vc_ghcb_invalidate(ghcb); 991 992 res = __vc_handle_msr(ghcb, &ctxt, false); 993 if (res != ES_OK) { 994 pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res); 995 /* MSR read failures are treated as fatal errors */ 996 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 997 } 998 999 __sev_put_ghcb(&state); 1000 1001 return regs.ax | regs.dx << 32; 1002 } 1003 1004 void savic_ghcb_msr_write(u32 reg, u64 value) 1005 { 1006 u64 msr = APIC_BASE_MSR + (reg >> 4); 1007 struct pt_regs regs = { 1008 .cx = msr, 1009 .ax = lower_32_bits(value), 1010 .dx = upper_32_bits(value) 1011 }; 1012 struct es_em_ctxt ctxt = { .regs = ®s }; 1013 struct ghcb_state state; 1014 enum es_result res; 1015 struct ghcb *ghcb; 1016 1017 guard(irqsave)(); 1018 1019 ghcb = __sev_get_ghcb(&state); 1020 vc_ghcb_invalidate(ghcb); 1021 1022 res = __vc_handle_msr(ghcb, &ctxt, true); 1023 if (res != ES_OK) { 1024 pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res); 1025 /* MSR writes should never fail. Any failure is fatal error for SNP guest */ 1026 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 1027 } 1028 1029 __sev_put_ghcb(&state); 1030 } 1031 1032 enum es_result savic_register_gpa(u64 gpa) 1033 { 1034 struct ghcb_state state; 1035 struct es_em_ctxt ctxt; 1036 enum es_result res; 1037 struct ghcb *ghcb; 1038 1039 guard(irqsave)(); 1040 1041 ghcb = __sev_get_ghcb(&state); 1042 vc_ghcb_invalidate(ghcb); 1043 1044 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); 1045 ghcb_set_rbx(ghcb, gpa); 1046 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, 1047 SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0); 1048 1049 __sev_put_ghcb(&state); 1050 1051 return res; 1052 } 1053 1054 enum es_result savic_unregister_gpa(u64 *gpa) 1055 { 1056 struct ghcb_state state; 1057 struct es_em_ctxt ctxt; 1058 enum es_result res; 1059 struct ghcb *ghcb; 1060 1061 guard(irqsave)(); 1062 1063 ghcb = __sev_get_ghcb(&state); 1064 vc_ghcb_invalidate(ghcb); 1065 1066 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); 1067 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, 1068 SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0); 1069 if (gpa && res == ES_OK) 1070 *gpa = ghcb->save.rbx; 1071 1072 __sev_put_ghcb(&state); 1073 1074 return res; 1075 } 1076 1077 static void snp_register_per_cpu_ghcb(void) 1078 { 1079 struct sev_es_runtime_data *data; 1080 struct ghcb *ghcb; 1081 1082 data = this_cpu_read(runtime_data); 1083 ghcb = &data->ghcb_page; 1084 1085 snp_register_ghcb_early(__pa(ghcb)); 1086 } 1087 1088 void setup_ghcb(void) 1089 { 1090 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1091 return; 1092 1093 /* 1094 * Check whether the runtime #VC exception handler is active. It uses 1095 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1096 * 1097 * If SNP is active, register the per-CPU GHCB page so that the runtime 1098 * exception handler can use it. 1099 */ 1100 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1101 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1102 snp_register_per_cpu_ghcb(); 1103 1104 sev_cfg.ghcbs_initialized = true; 1105 1106 return; 1107 } 1108 1109 /* 1110 * Make sure the hypervisor talks a supported protocol. 1111 * This gets called only in the BSP boot phase. 1112 */ 1113 if (!sev_es_negotiate_protocol()) 1114 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1115 1116 /* 1117 * Clear the boot_ghcb. The first exception comes in before the bss 1118 * section is cleared. 1119 */ 1120 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1121 1122 /* Alright - Make the boot-ghcb public */ 1123 boot_ghcb = &boot_ghcb_page; 1124 1125 /* SNP guest requires that GHCB GPA must be registered. */ 1126 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1127 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1128 } 1129 1130 #ifdef CONFIG_HOTPLUG_CPU 1131 static void sev_es_ap_hlt_loop(void) 1132 { 1133 struct ghcb_state state; 1134 struct ghcb *ghcb; 1135 1136 ghcb = __sev_get_ghcb(&state); 1137 1138 while (true) { 1139 vc_ghcb_invalidate(ghcb); 1140 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1141 ghcb_set_sw_exit_info_1(ghcb, 0); 1142 ghcb_set_sw_exit_info_2(ghcb, 0); 1143 1144 sev_es_wr_ghcb_msr(__pa(ghcb)); 1145 VMGEXIT(); 1146 1147 /* Wakeup signal? */ 1148 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1149 ghcb->save.sw_exit_info_2) 1150 break; 1151 } 1152 1153 __sev_put_ghcb(&state); 1154 } 1155 1156 /* 1157 * Play_dead handler when running under SEV-ES. This is needed because 1158 * the hypervisor can't deliver an SIPI request to restart the AP. 1159 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1160 * hypervisor wakes it up again. 1161 */ 1162 static void sev_es_play_dead(void) 1163 { 1164 play_dead_common(); 1165 1166 /* IRQs now disabled */ 1167 1168 sev_es_ap_hlt_loop(); 1169 1170 /* 1171 * If we get here, the VCPU was woken up again. Jump to CPU 1172 * startup code to get it back online. 1173 */ 1174 soft_restart_cpu(); 1175 } 1176 #else /* CONFIG_HOTPLUG_CPU */ 1177 #define sev_es_play_dead native_play_dead 1178 #endif /* CONFIG_HOTPLUG_CPU */ 1179 1180 #ifdef CONFIG_SMP 1181 static void __init sev_es_setup_play_dead(void) 1182 { 1183 smp_ops.play_dead = sev_es_play_dead; 1184 } 1185 #else 1186 static inline void sev_es_setup_play_dead(void) { } 1187 #endif 1188 1189 static void __init alloc_runtime_data(int cpu) 1190 { 1191 struct sev_es_runtime_data *data; 1192 1193 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1194 if (!data) 1195 panic("Can't allocate SEV-ES runtime data"); 1196 1197 per_cpu(runtime_data, cpu) = data; 1198 1199 if (snp_vmpl) { 1200 struct svsm_ca *caa; 1201 1202 /* Allocate the SVSM CA page if an SVSM is present */ 1203 caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE) 1204 : &boot_svsm_ca_page; 1205 1206 per_cpu(svsm_caa, cpu) = caa; 1207 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1208 } 1209 } 1210 1211 static void __init init_ghcb(int cpu) 1212 { 1213 struct sev_es_runtime_data *data; 1214 int err; 1215 1216 data = per_cpu(runtime_data, cpu); 1217 1218 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1219 sizeof(data->ghcb_page)); 1220 if (err) 1221 panic("Can't map GHCBs unencrypted"); 1222 1223 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1224 1225 data->ghcb_active = false; 1226 data->backup_ghcb_active = false; 1227 } 1228 1229 void __init sev_es_init_vc_handling(void) 1230 { 1231 int cpu; 1232 1233 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1234 1235 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1236 return; 1237 1238 if (!sev_es_check_cpu_features()) 1239 panic("SEV-ES CPU Features missing"); 1240 1241 /* 1242 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1243 * features. 1244 */ 1245 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1246 sev_hv_features = get_hv_features(); 1247 1248 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1249 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1250 } 1251 1252 /* Initialize per-cpu GHCB pages */ 1253 for_each_possible_cpu(cpu) { 1254 alloc_runtime_data(cpu); 1255 init_ghcb(cpu); 1256 } 1257 1258 if (snp_vmpl) 1259 sev_cfg.use_cas = true; 1260 1261 sev_es_setup_play_dead(); 1262 1263 /* Secondary CPUs use the runtime #VC handler */ 1264 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1265 } 1266 1267 /* 1268 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 1269 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 1270 * ROM region can cause a crash since this region is not pre-validated. 1271 */ 1272 void __init snp_dmi_setup(void) 1273 { 1274 if (efi_enabled(EFI_CONFIG_TABLES)) 1275 dmi_setup(); 1276 } 1277 1278 static void dump_cpuid_table(void) 1279 { 1280 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 1281 int i = 0; 1282 1283 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 1284 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 1285 1286 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 1287 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 1288 1289 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 1290 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 1291 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 1292 } 1293 } 1294 1295 /* 1296 * It is useful from an auditing/testing perspective to provide an easy way 1297 * for the guest owner to know that the CPUID table has been initialized as 1298 * expected, but that initialization happens too early in boot to print any 1299 * sort of indicator, and there's not really any other good place to do it, 1300 * so do it here. 1301 * 1302 * If running as an SNP guest, report the current VM privilege level (VMPL). 1303 */ 1304 static int __init report_snp_info(void) 1305 { 1306 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 1307 1308 if (cpuid_table->count) { 1309 pr_info("Using SNP CPUID table, %d entries present.\n", 1310 cpuid_table->count); 1311 1312 if (sev_cfg.debug) 1313 dump_cpuid_table(); 1314 } 1315 1316 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1317 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 1318 1319 return 0; 1320 } 1321 arch_initcall(report_snp_info); 1322 1323 static int snp_issue_guest_request(struct snp_guest_req *req) 1324 { 1325 struct snp_req_data *input = &req->input; 1326 struct ghcb_state state; 1327 struct es_em_ctxt ctxt; 1328 unsigned long flags; 1329 struct ghcb *ghcb; 1330 int ret; 1331 1332 req->exitinfo2 = SEV_RET_NO_FW_CALL; 1333 1334 /* 1335 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 1336 * a per-CPU GHCB. 1337 */ 1338 local_irq_save(flags); 1339 1340 ghcb = __sev_get_ghcb(&state); 1341 if (!ghcb) { 1342 ret = -EIO; 1343 goto e_restore_irq; 1344 } 1345 1346 vc_ghcb_invalidate(ghcb); 1347 1348 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 1349 ghcb_set_rax(ghcb, input->data_gpa); 1350 ghcb_set_rbx(ghcb, input->data_npages); 1351 } 1352 1353 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 1354 if (ret) 1355 goto e_put; 1356 1357 req->exitinfo2 = ghcb->save.sw_exit_info_2; 1358 switch (req->exitinfo2) { 1359 case 0: 1360 break; 1361 1362 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 1363 ret = -EAGAIN; 1364 break; 1365 1366 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 1367 /* Number of expected pages are returned in RBX */ 1368 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 1369 input->data_npages = ghcb_get_rbx(ghcb); 1370 ret = -ENOSPC; 1371 break; 1372 } 1373 fallthrough; 1374 default: 1375 ret = -EIO; 1376 break; 1377 } 1378 1379 e_put: 1380 __sev_put_ghcb(&state); 1381 e_restore_irq: 1382 local_irq_restore(flags); 1383 1384 return ret; 1385 } 1386 1387 static struct platform_device sev_guest_device = { 1388 .name = "sev-guest", 1389 .id = -1, 1390 }; 1391 1392 static struct platform_device tpm_svsm_device = { 1393 .name = "tpm-svsm", 1394 .id = -1, 1395 }; 1396 1397 static int __init snp_init_platform_device(void) 1398 { 1399 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1400 return -ENODEV; 1401 1402 if (platform_device_register(&sev_guest_device)) 1403 return -ENODEV; 1404 1405 if (snp_svsm_vtpm_probe() && 1406 platform_device_register(&tpm_svsm_device)) 1407 return -ENODEV; 1408 1409 pr_info("SNP guest platform devices initialized.\n"); 1410 return 0; 1411 } 1412 device_initcall(snp_init_platform_device); 1413 1414 void sev_show_status(void) 1415 { 1416 int i; 1417 1418 pr_info("Status: "); 1419 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 1420 if (sev_status & BIT_ULL(i)) { 1421 if (!sev_status_feat_names[i]) 1422 continue; 1423 1424 pr_cont("%s ", sev_status_feat_names[i]); 1425 } 1426 } 1427 pr_cont("\n"); 1428 } 1429 1430 #ifdef CONFIG_SYSFS 1431 static ssize_t vmpl_show(struct kobject *kobj, 1432 struct kobj_attribute *attr, char *buf) 1433 { 1434 return sysfs_emit(buf, "%d\n", snp_vmpl); 1435 } 1436 1437 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 1438 1439 static struct attribute *vmpl_attrs[] = { 1440 &vmpl_attr.attr, 1441 NULL 1442 }; 1443 1444 static struct attribute_group sev_attr_group = { 1445 .attrs = vmpl_attrs, 1446 }; 1447 1448 static int __init sev_sysfs_init(void) 1449 { 1450 struct kobject *sev_kobj; 1451 struct device *dev_root; 1452 int ret; 1453 1454 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1455 return -ENODEV; 1456 1457 dev_root = bus_get_dev_root(&cpu_subsys); 1458 if (!dev_root) 1459 return -ENODEV; 1460 1461 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 1462 put_device(dev_root); 1463 1464 if (!sev_kobj) 1465 return -ENOMEM; 1466 1467 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 1468 if (ret) 1469 kobject_put(sev_kobj); 1470 1471 return ret; 1472 } 1473 arch_initcall(sev_sysfs_init); 1474 #endif // CONFIG_SYSFS 1475 1476 static void free_shared_pages(void *buf, size_t sz) 1477 { 1478 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 1479 int ret; 1480 1481 if (!buf) 1482 return; 1483 1484 ret = set_memory_encrypted((unsigned long)buf, npages); 1485 if (ret) { 1486 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 1487 return; 1488 } 1489 1490 __free_pages(virt_to_page(buf), get_order(sz)); 1491 } 1492 1493 static void *alloc_shared_pages(size_t sz) 1494 { 1495 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 1496 struct page *page; 1497 int ret; 1498 1499 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 1500 if (!page) 1501 return NULL; 1502 1503 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 1504 if (ret) { 1505 pr_err("failed to mark page shared, ret=%d\n", ret); 1506 __free_pages(page, get_order(sz)); 1507 return NULL; 1508 } 1509 1510 return page_address(page); 1511 } 1512 1513 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 1514 { 1515 u8 *key = NULL; 1516 1517 switch (id) { 1518 case 0: 1519 *seqno = &secrets->os_area.msg_seqno_0; 1520 key = secrets->vmpck0; 1521 break; 1522 case 1: 1523 *seqno = &secrets->os_area.msg_seqno_1; 1524 key = secrets->vmpck1; 1525 break; 1526 case 2: 1527 *seqno = &secrets->os_area.msg_seqno_2; 1528 key = secrets->vmpck2; 1529 break; 1530 case 3: 1531 *seqno = &secrets->os_area.msg_seqno_3; 1532 key = secrets->vmpck3; 1533 break; 1534 default: 1535 break; 1536 } 1537 1538 return key; 1539 } 1540 1541 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 1542 { 1543 struct aesgcm_ctx *ctx; 1544 1545 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 1546 if (!ctx) 1547 return NULL; 1548 1549 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 1550 pr_err("Crypto context initialization failed\n"); 1551 kfree(ctx); 1552 return NULL; 1553 } 1554 1555 return ctx; 1556 } 1557 1558 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 1559 { 1560 /* Adjust the default VMPCK key based on the executing VMPL level */ 1561 if (vmpck_id == -1) 1562 vmpck_id = snp_vmpl; 1563 1564 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 1565 if (!mdesc->vmpck) { 1566 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 1567 return -EINVAL; 1568 } 1569 1570 /* Verify that VMPCK is not zero. */ 1571 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 1572 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 1573 return -EINVAL; 1574 } 1575 1576 mdesc->vmpck_id = vmpck_id; 1577 1578 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 1579 if (!mdesc->ctx) 1580 return -ENOMEM; 1581 1582 return 0; 1583 } 1584 EXPORT_SYMBOL_GPL(snp_msg_init); 1585 1586 struct snp_msg_desc *snp_msg_alloc(void) 1587 { 1588 struct snp_msg_desc *mdesc; 1589 void __iomem *mem; 1590 1591 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 1592 1593 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); 1594 if (!mdesc) 1595 return ERR_PTR(-ENOMEM); 1596 1597 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE); 1598 if (!mem) 1599 goto e_free_mdesc; 1600 1601 mdesc->secrets = (__force struct snp_secrets_page *)mem; 1602 1603 /* Allocate the shared page used for the request and response message. */ 1604 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 1605 if (!mdesc->request) 1606 goto e_unmap; 1607 1608 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 1609 if (!mdesc->response) 1610 goto e_free_request; 1611 1612 return mdesc; 1613 1614 e_free_request: 1615 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 1616 e_unmap: 1617 iounmap(mem); 1618 e_free_mdesc: 1619 kfree(mdesc); 1620 1621 return ERR_PTR(-ENOMEM); 1622 } 1623 EXPORT_SYMBOL_GPL(snp_msg_alloc); 1624 1625 void snp_msg_free(struct snp_msg_desc *mdesc) 1626 { 1627 if (!mdesc) 1628 return; 1629 1630 kfree(mdesc->ctx); 1631 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 1632 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 1633 iounmap((__force void __iomem *)mdesc->secrets); 1634 1635 kfree_sensitive(mdesc); 1636 } 1637 EXPORT_SYMBOL_GPL(snp_msg_free); 1638 1639 /* Mutex to serialize the shared buffer access and command handling. */ 1640 static DEFINE_MUTEX(snp_cmd_mutex); 1641 1642 /* 1643 * If an error is received from the host or AMD Secure Processor (ASP) there 1644 * are two options. Either retry the exact same encrypted request or discontinue 1645 * using the VMPCK. 1646 * 1647 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 1648 * encrypt the requests. The IV for this scheme is the sequence number. GCM 1649 * cannot tolerate IV reuse. 1650 * 1651 * The ASP FW v1.51 only increments the sequence numbers on a successful 1652 * guest<->ASP back and forth and only accepts messages at its exact sequence 1653 * number. 1654 * 1655 * So if the sequence number were to be reused the encryption scheme is 1656 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 1657 * will reject the request. 1658 */ 1659 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 1660 { 1661 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 1662 mdesc->vmpck_id); 1663 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 1664 mdesc->vmpck = NULL; 1665 } 1666 1667 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 1668 { 1669 u64 count; 1670 1671 lockdep_assert_held(&snp_cmd_mutex); 1672 1673 /* Read the current message sequence counter from secrets pages */ 1674 count = *mdesc->os_area_msg_seqno; 1675 1676 return count + 1; 1677 } 1678 1679 /* Return a non-zero on success */ 1680 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 1681 { 1682 u64 count = __snp_get_msg_seqno(mdesc); 1683 1684 /* 1685 * The message sequence counter for the SNP guest request is a 64-bit 1686 * value but the version 2 of GHCB specification defines a 32-bit storage 1687 * for it. If the counter exceeds the 32-bit value then return zero. 1688 * The caller should check the return value, but if the caller happens to 1689 * not check the value and use it, then the firmware treats zero as an 1690 * invalid number and will fail the message request. 1691 */ 1692 if (count >= UINT_MAX) { 1693 pr_err("request message sequence counter overflow\n"); 1694 return 0; 1695 } 1696 1697 return count; 1698 } 1699 1700 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 1701 { 1702 /* 1703 * The counter is also incremented by the PSP, so increment it by 2 1704 * and save in secrets page. 1705 */ 1706 *mdesc->os_area_msg_seqno += 2; 1707 } 1708 1709 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 1710 { 1711 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 1712 struct snp_guest_msg *req_msg = &mdesc->secret_request; 1713 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 1714 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 1715 struct aesgcm_ctx *ctx = mdesc->ctx; 1716 u8 iv[GCM_AES_IV_SIZE] = {}; 1717 1718 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 1719 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 1720 resp_msg_hdr->msg_sz); 1721 1722 /* Copy response from shared memory to encrypted memory. */ 1723 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 1724 1725 /* Verify that the sequence counter is incremented by 1 */ 1726 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 1727 return -EBADMSG; 1728 1729 /* Verify response message type and version number. */ 1730 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 1731 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 1732 return -EBADMSG; 1733 1734 /* 1735 * If the message size is greater than our buffer length then return 1736 * an error. 1737 */ 1738 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 1739 return -EBADMSG; 1740 1741 /* Decrypt the payload */ 1742 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 1743 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 1744 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 1745 return -EBADMSG; 1746 1747 return 0; 1748 } 1749 1750 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 1751 { 1752 struct snp_guest_msg *msg = &mdesc->secret_request; 1753 struct snp_guest_msg_hdr *hdr = &msg->hdr; 1754 struct aesgcm_ctx *ctx = mdesc->ctx; 1755 u8 iv[GCM_AES_IV_SIZE] = {}; 1756 1757 memset(msg, 0, sizeof(*msg)); 1758 1759 hdr->algo = SNP_AEAD_AES_256_GCM; 1760 hdr->hdr_version = MSG_HDR_VER; 1761 hdr->hdr_sz = sizeof(*hdr); 1762 hdr->msg_type = req->msg_type; 1763 hdr->msg_version = req->msg_version; 1764 hdr->msg_seqno = seqno; 1765 hdr->msg_vmpck = req->vmpck_id; 1766 hdr->msg_sz = req->req_sz; 1767 1768 /* Verify the sequence number is non-zero */ 1769 if (!hdr->msg_seqno) 1770 return -ENOSR; 1771 1772 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 1773 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 1774 1775 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 1776 return -EBADMSG; 1777 1778 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 1779 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 1780 AAD_LEN, iv, hdr->authtag); 1781 1782 return 0; 1783 } 1784 1785 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 1786 { 1787 unsigned long req_start = jiffies; 1788 unsigned int override_npages = 0; 1789 u64 override_err = 0; 1790 int rc; 1791 1792 retry_request: 1793 /* 1794 * Call firmware to process the request. In this function the encrypted 1795 * message enters shared memory with the host. So after this call the 1796 * sequence number must be incremented or the VMPCK must be deleted to 1797 * prevent reuse of the IV. 1798 */ 1799 rc = snp_issue_guest_request(req); 1800 switch (rc) { 1801 case -ENOSPC: 1802 /* 1803 * If the extended guest request fails due to having too 1804 * small of a certificate data buffer, retry the same 1805 * guest request without the extended data request in 1806 * order to increment the sequence number and thus avoid 1807 * IV reuse. 1808 */ 1809 override_npages = req->input.data_npages; 1810 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 1811 1812 /* 1813 * Override the error to inform callers the given extended 1814 * request buffer size was too small and give the caller the 1815 * required buffer size. 1816 */ 1817 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 1818 1819 /* 1820 * If this call to the firmware succeeds, the sequence number can 1821 * be incremented allowing for continued use of the VMPCK. If 1822 * there is an error reflected in the return value, this value 1823 * is checked further down and the result will be the deletion 1824 * of the VMPCK and the error code being propagated back to the 1825 * user as an ioctl() return code. 1826 */ 1827 goto retry_request; 1828 1829 /* 1830 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 1831 * throttled. Retry in the driver to avoid returning and reusing the 1832 * message sequence number on a different message. 1833 */ 1834 case -EAGAIN: 1835 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 1836 rc = -ETIMEDOUT; 1837 break; 1838 } 1839 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 1840 goto retry_request; 1841 } 1842 1843 /* 1844 * Increment the message sequence number. There is no harm in doing 1845 * this now because decryption uses the value stored in the response 1846 * structure and any failure will wipe the VMPCK, preventing further 1847 * use anyway. 1848 */ 1849 snp_inc_msg_seqno(mdesc); 1850 1851 if (override_err) { 1852 req->exitinfo2 = override_err; 1853 1854 /* 1855 * If an extended guest request was issued and the supplied certificate 1856 * buffer was not large enough, a standard guest request was issued to 1857 * prevent IV reuse. If the standard request was successful, return -EIO 1858 * back to the caller as would have originally been returned. 1859 */ 1860 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 1861 rc = -EIO; 1862 } 1863 1864 if (override_npages) 1865 req->input.data_npages = override_npages; 1866 1867 return rc; 1868 } 1869 1870 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 1871 { 1872 u64 seqno; 1873 int rc; 1874 1875 /* 1876 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW. 1877 * The offload's DMA SG list of data to encrypt has to be in linear mapping. 1878 */ 1879 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) { 1880 pr_warn("AES-GSM buffers must be in linear mapping"); 1881 return -EINVAL; 1882 } 1883 1884 guard(mutex)(&snp_cmd_mutex); 1885 1886 /* Check if the VMPCK is not empty */ 1887 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 1888 pr_err_ratelimited("VMPCK is disabled\n"); 1889 return -ENOTTY; 1890 } 1891 1892 /* Get message sequence and verify that its a non-zero */ 1893 seqno = snp_get_msg_seqno(mdesc); 1894 if (!seqno) 1895 return -EIO; 1896 1897 /* Clear shared memory's response for the host to populate. */ 1898 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 1899 1900 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 1901 rc = enc_payload(mdesc, seqno, req); 1902 if (rc) 1903 return rc; 1904 1905 /* 1906 * Write the fully encrypted request to the shared unencrypted 1907 * request page. 1908 */ 1909 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 1910 1911 /* Initialize the input address for guest request */ 1912 req->input.req_gpa = __pa(mdesc->request); 1913 req->input.resp_gpa = __pa(mdesc->response); 1914 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; 1915 1916 rc = __handle_guest_request(mdesc, req); 1917 if (rc) { 1918 if (rc == -EIO && 1919 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 1920 return rc; 1921 1922 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 1923 rc, req->exitinfo2); 1924 1925 snp_disable_vmpck(mdesc); 1926 return rc; 1927 } 1928 1929 rc = verify_and_dec_payload(mdesc, req); 1930 if (rc) { 1931 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 1932 snp_disable_vmpck(mdesc); 1933 return rc; 1934 } 1935 1936 return 0; 1937 } 1938 EXPORT_SYMBOL_GPL(snp_send_guest_request); 1939 1940 static int __init snp_get_tsc_info(void) 1941 { 1942 struct snp_tsc_info_resp *tsc_resp; 1943 struct snp_tsc_info_req *tsc_req; 1944 struct snp_msg_desc *mdesc; 1945 struct snp_guest_req req = {}; 1946 int rc = -ENOMEM; 1947 1948 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); 1949 if (!tsc_req) 1950 return rc; 1951 1952 /* 1953 * The intermediate response buffer is used while decrypting the 1954 * response payload. Make sure that it has enough space to cover 1955 * the authtag. 1956 */ 1957 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 1958 if (!tsc_resp) 1959 goto e_free_tsc_req; 1960 1961 mdesc = snp_msg_alloc(); 1962 if (IS_ERR_OR_NULL(mdesc)) 1963 goto e_free_tsc_resp; 1964 1965 rc = snp_msg_init(mdesc, snp_vmpl); 1966 if (rc) 1967 goto e_free_mdesc; 1968 1969 req.msg_version = MSG_HDR_VER; 1970 req.msg_type = SNP_MSG_TSC_INFO_REQ; 1971 req.vmpck_id = snp_vmpl; 1972 req.req_buf = tsc_req; 1973 req.req_sz = sizeof(*tsc_req); 1974 req.resp_buf = (void *)tsc_resp; 1975 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 1976 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST; 1977 1978 rc = snp_send_guest_request(mdesc, &req); 1979 if (rc) 1980 goto e_request; 1981 1982 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 1983 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 1984 tsc_resp->tsc_factor); 1985 1986 if (!tsc_resp->status) { 1987 snp_tsc_scale = tsc_resp->tsc_scale; 1988 snp_tsc_offset = tsc_resp->tsc_offset; 1989 } else { 1990 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 1991 rc = -EIO; 1992 } 1993 1994 e_request: 1995 /* The response buffer contains sensitive data, explicitly clear it. */ 1996 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 1997 e_free_mdesc: 1998 snp_msg_free(mdesc); 1999 e_free_tsc_resp: 2000 kfree(tsc_resp); 2001 e_free_tsc_req: 2002 kfree(tsc_req); 2003 2004 return rc; 2005 } 2006 2007 void __init snp_secure_tsc_prepare(void) 2008 { 2009 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 2010 return; 2011 2012 if (snp_get_tsc_info()) { 2013 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 2014 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 2015 } 2016 2017 pr_debug("SecureTSC enabled"); 2018 } 2019 2020 static unsigned long securetsc_get_tsc_khz(void) 2021 { 2022 return snp_tsc_freq_khz; 2023 } 2024 2025 void __init snp_secure_tsc_init(void) 2026 { 2027 struct snp_secrets_page *secrets; 2028 unsigned long tsc_freq_mhz; 2029 void *mem; 2030 2031 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 2032 return; 2033 2034 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); 2035 if (!mem) { 2036 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); 2037 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 2038 } 2039 2040 secrets = (__force struct snp_secrets_page *)mem; 2041 2042 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 2043 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 2044 2045 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ 2046 tsc_freq_mhz &= GENMASK_ULL(17, 0); 2047 2048 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); 2049 2050 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 2051 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 2052 2053 early_memunmap(mem, PAGE_SIZE); 2054 } 2055