1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/cpumask.h> 22 #include <linux/efi.h> 23 #include <linux/platform_device.h> 24 #include <linux/io.h> 25 #include <linux/psp-sev.h> 26 #include <linux/dmi.h> 27 #include <uapi/linux/sev-guest.h> 28 #include <crypto/gcm.h> 29 30 #include <asm/init.h> 31 #include <asm/cpu_entry_area.h> 32 #include <asm/stacktrace.h> 33 #include <asm/sev.h> 34 #include <asm/insn-eval.h> 35 #include <asm/fpu/xcr.h> 36 #include <asm/processor.h> 37 #include <asm/realmode.h> 38 #include <asm/setup.h> 39 #include <asm/traps.h> 40 #include <asm/svm.h> 41 #include <asm/smp.h> 42 #include <asm/cpu.h> 43 #include <asm/apic.h> 44 #include <asm/cpuid/api.h> 45 #include <asm/cmdline.h> 46 #include <asm/msr.h> 47 48 #include "internal.h" 49 50 /* Bitmap of SEV features supported by the hypervisor */ 51 u64 sev_hv_features __ro_after_init; 52 SYM_PIC_ALIAS(sev_hv_features); 53 54 /* Secrets page physical address from the CC blob */ 55 u64 sev_secrets_pa __ro_after_init; 56 SYM_PIC_ALIAS(sev_secrets_pa); 57 58 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 59 #define AP_INIT_CS_LIMIT 0xffff 60 #define AP_INIT_DS_LIMIT 0xffff 61 #define AP_INIT_LDTR_LIMIT 0xffff 62 #define AP_INIT_GDTR_LIMIT 0xffff 63 #define AP_INIT_IDTR_LIMIT 0xffff 64 #define AP_INIT_TR_LIMIT 0xffff 65 #define AP_INIT_RFLAGS_DEFAULT 0x2 66 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 67 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 68 #define AP_INIT_XCR0_DEFAULT 0x1 69 #define AP_INIT_X87_FTW_DEFAULT 0x5555 70 #define AP_INIT_X87_FCW_DEFAULT 0x0040 71 #define AP_INIT_CR0_DEFAULT 0x60000010 72 #define AP_INIT_MXCSR_DEFAULT 0x1f80 73 74 static const char * const sev_status_feat_names[] = { 75 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 76 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 77 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 78 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 79 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 80 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 81 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 82 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 83 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 84 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 85 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 86 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 87 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 88 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 89 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 90 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 91 [MSR_AMD64_SNP_SECURE_AVIC_BIT] = "SecureAVIC", 92 [MSR_AMD64_SNP_IBPB_ON_ENTRY_BIT] = "IBPBOnEntry", 93 }; 94 95 /* 96 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 97 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 98 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 99 */ 100 static u64 snp_tsc_scale __ro_after_init; 101 static u64 snp_tsc_offset __ro_after_init; 102 static unsigned long snp_tsc_freq_khz __ro_after_init; 103 104 DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 105 DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 106 107 /* 108 * SVSM related information: 109 * When running under an SVSM, the VMPL that Linux is executing at must be 110 * non-zero. The VMPL is therefore used to indicate the presence of an SVSM. 111 */ 112 u8 snp_vmpl __ro_after_init; 113 EXPORT_SYMBOL_GPL(snp_vmpl); 114 SYM_PIC_ALIAS(snp_vmpl); 115 116 /* 117 * Since feature negotiation related variables are set early in the boot 118 * process they must reside in the .data section so as not to be zeroed 119 * out when the .bss section is later cleared. 120 * 121 * GHCB protocol version negotiated with the hypervisor. 122 */ 123 u16 ghcb_version __ro_after_init; 124 SYM_PIC_ALIAS(ghcb_version); 125 126 /* For early boot hypervisor communication in SEV-ES enabled guests */ 127 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 128 129 /* 130 * Needs to be in the .data section because we need it NULL before bss is 131 * cleared 132 */ 133 struct ghcb *boot_ghcb __section(".data"); 134 135 static u64 __init get_snp_jump_table_addr(void) 136 { 137 struct snp_secrets_page *secrets; 138 void __iomem *mem; 139 u64 addr; 140 141 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE); 142 if (!mem) { 143 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 144 return 0; 145 } 146 147 secrets = (__force struct snp_secrets_page *)mem; 148 149 addr = secrets->os_area.ap_jump_table_pa; 150 iounmap(mem); 151 152 return addr; 153 } 154 155 static u64 __init get_jump_table_addr(void) 156 { 157 struct ghcb_state state; 158 unsigned long flags; 159 struct ghcb *ghcb; 160 u64 ret = 0; 161 162 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 163 return get_snp_jump_table_addr(); 164 165 local_irq_save(flags); 166 167 ghcb = __sev_get_ghcb(&state); 168 169 vc_ghcb_invalidate(ghcb); 170 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 171 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 172 ghcb_set_sw_exit_info_2(ghcb, 0); 173 174 sev_es_wr_ghcb_msr(__pa(ghcb)); 175 VMGEXIT(); 176 177 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 178 ghcb_sw_exit_info_2_is_valid(ghcb)) 179 ret = ghcb->save.sw_exit_info_2; 180 181 __sev_put_ghcb(&state); 182 183 local_irq_restore(flags); 184 185 return ret; 186 } 187 188 static void pval_pages(struct snp_psc_desc *desc) 189 { 190 struct psc_entry *e; 191 unsigned long vaddr; 192 unsigned int size; 193 unsigned int i; 194 bool validate; 195 u64 pfn; 196 int rc; 197 198 for (i = 0; i <= desc->hdr.end_entry; i++) { 199 e = &desc->entries[i]; 200 201 pfn = e->gfn; 202 vaddr = (unsigned long)pfn_to_kaddr(pfn); 203 size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; 204 validate = e->operation == SNP_PAGE_STATE_PRIVATE; 205 206 rc = pvalidate(vaddr, size, validate); 207 if (!rc) 208 continue; 209 210 if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { 211 unsigned long vaddr_end = vaddr + PMD_SIZE; 212 213 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) { 214 rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); 215 if (rc) 216 __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0); 217 } 218 } else { 219 __pval_terminate(pfn, validate, size, rc, 0); 220 } 221 } 222 } 223 224 static void pvalidate_pages(struct snp_psc_desc *desc) 225 { 226 struct psc_entry *e; 227 unsigned int i; 228 229 if (snp_vmpl) 230 svsm_pval_pages(desc); 231 else 232 pval_pages(desc); 233 234 /* 235 * If not affected by the cache-coherency vulnerability there is no need 236 * to perform the cache eviction mitigation. 237 */ 238 if (cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO)) 239 return; 240 241 for (i = 0; i <= desc->hdr.end_entry; i++) { 242 e = &desc->entries[i]; 243 244 /* 245 * If validating memory (making it private) perform the cache 246 * eviction mitigation. 247 */ 248 if (e->operation == SNP_PAGE_STATE_PRIVATE) 249 sev_evict_cache(pfn_to_kaddr(e->gfn), e->pagesize ? 512 : 1); 250 } 251 } 252 253 static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) 254 { 255 int cur_entry, end_entry, ret = 0; 256 struct snp_psc_desc *data; 257 struct es_em_ctxt ctxt; 258 259 vc_ghcb_invalidate(ghcb); 260 261 /* Copy the input desc into GHCB shared buffer */ 262 data = (struct snp_psc_desc *)ghcb->shared_buffer; 263 memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); 264 265 /* 266 * As per the GHCB specification, the hypervisor can resume the guest 267 * before processing all the entries. Check whether all the entries 268 * are processed. If not, then keep retrying. Note, the hypervisor 269 * will update the data memory directly to indicate the status, so 270 * reference the data->hdr everywhere. 271 * 272 * The strategy here is to wait for the hypervisor to change the page 273 * state in the RMP table before guest accesses the memory pages. If the 274 * page state change was not successful, then later memory access will 275 * result in a crash. 276 */ 277 cur_entry = data->hdr.cur_entry; 278 end_entry = data->hdr.end_entry; 279 280 while (data->hdr.cur_entry <= data->hdr.end_entry) { 281 ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); 282 283 /* This will advance the shared buffer data points to. */ 284 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); 285 286 /* 287 * Page State Change VMGEXIT can pass error code through 288 * exit_info_2. 289 */ 290 if (WARN(ret || ghcb->save.sw_exit_info_2, 291 "SNP: PSC failed ret=%d exit_info_2=%llx\n", 292 ret, ghcb->save.sw_exit_info_2)) { 293 ret = 1; 294 goto out; 295 } 296 297 /* Verify that reserved bit is not set */ 298 if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { 299 ret = 1; 300 goto out; 301 } 302 303 /* 304 * Sanity check that entry processing is not going backwards. 305 * This will happen only if hypervisor is tricking us. 306 */ 307 if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, 308 "SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", 309 end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { 310 ret = 1; 311 goto out; 312 } 313 } 314 315 out: 316 return ret; 317 } 318 319 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 320 unsigned long vaddr_end, int op) 321 { 322 struct ghcb_state state; 323 bool use_large_entry; 324 struct psc_hdr *hdr; 325 struct psc_entry *e; 326 unsigned long flags; 327 unsigned long pfn; 328 struct ghcb *ghcb; 329 int i; 330 331 hdr = &data->hdr; 332 e = data->entries; 333 334 memset(data, 0, sizeof(*data)); 335 i = 0; 336 337 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 338 hdr->end_entry = i; 339 340 if (is_vmalloc_addr((void *)vaddr)) { 341 pfn = vmalloc_to_pfn((void *)vaddr); 342 use_large_entry = false; 343 } else { 344 pfn = __pa(vaddr) >> PAGE_SHIFT; 345 use_large_entry = true; 346 } 347 348 e->gfn = pfn; 349 e->operation = op; 350 351 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 352 (vaddr_end - vaddr) >= PMD_SIZE) { 353 e->pagesize = RMP_PG_SIZE_2M; 354 vaddr += PMD_SIZE; 355 } else { 356 e->pagesize = RMP_PG_SIZE_4K; 357 vaddr += PAGE_SIZE; 358 } 359 360 e++; 361 i++; 362 } 363 364 /* Page validation must be rescinded before changing to shared */ 365 if (op == SNP_PAGE_STATE_SHARED) 366 pvalidate_pages(data); 367 368 local_irq_save(flags); 369 370 if (sev_cfg.ghcbs_initialized) 371 ghcb = __sev_get_ghcb(&state); 372 else 373 ghcb = boot_ghcb; 374 375 /* Invoke the hypervisor to perform the page state changes */ 376 if (!ghcb || vmgexit_psc(ghcb, data)) 377 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 378 379 if (sev_cfg.ghcbs_initialized) 380 __sev_put_ghcb(&state); 381 382 local_irq_restore(flags); 383 384 /* Page validation must be performed after changing to private */ 385 if (op == SNP_PAGE_STATE_PRIVATE) 386 pvalidate_pages(data); 387 388 return vaddr; 389 } 390 391 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 392 { 393 struct snp_psc_desc desc; 394 unsigned long vaddr_end; 395 396 /* Use the MSR protocol when a GHCB is not available. */ 397 if (!boot_ghcb) { 398 struct psc_desc d = { op, svsm_get_caa(), svsm_get_caa_pa() }; 399 400 return early_set_pages_state(vaddr, __pa(vaddr), npages, &d); 401 } 402 403 vaddr = vaddr & PAGE_MASK; 404 vaddr_end = vaddr + (npages << PAGE_SHIFT); 405 406 while (vaddr < vaddr_end) 407 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 408 } 409 410 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 411 { 412 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 413 return; 414 415 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 416 } 417 418 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 419 { 420 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 421 return; 422 423 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 424 } 425 426 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 427 { 428 unsigned long vaddr, npages; 429 430 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 431 return; 432 433 vaddr = (unsigned long)__va(start); 434 npages = (end - start) >> PAGE_SHIFT; 435 436 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 437 } 438 439 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) 440 { 441 bool create = event != SVM_VMGEXIT_AP_DESTROY; 442 struct ghcb_state state; 443 unsigned long flags; 444 struct ghcb *ghcb; 445 int ret = 0; 446 447 local_irq_save(flags); 448 449 ghcb = __sev_get_ghcb(&state); 450 451 vc_ghcb_invalidate(ghcb); 452 453 if (create) 454 ghcb_set_rax(ghcb, vmsa->sev_features); 455 456 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 457 ghcb_set_sw_exit_info_1(ghcb, 458 ((u64)apic_id << 32) | 459 ((u64)snp_vmpl << 16) | 460 event); 461 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 462 463 sev_es_wr_ghcb_msr(__pa(ghcb)); 464 VMGEXIT(); 465 466 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 467 lower_32_bits(ghcb->save.sw_exit_info_1)) { 468 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); 469 ret = -EINVAL; 470 } 471 472 __sev_put_ghcb(&state); 473 474 local_irq_restore(flags); 475 476 return ret; 477 } 478 479 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 480 { 481 int ret; 482 483 if (snp_vmpl) { 484 struct svsm_call call = {}; 485 unsigned long flags; 486 487 local_irq_save(flags); 488 489 call.caa = this_cpu_read(svsm_caa); 490 call.rcx = __pa(va); 491 492 if (make_vmsa) { 493 /* Protocol 0, Call ID 2 */ 494 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 495 call.rdx = __pa(caa); 496 call.r8 = apic_id; 497 } else { 498 /* Protocol 0, Call ID 3 */ 499 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 500 } 501 502 ret = svsm_perform_call_protocol(&call); 503 504 local_irq_restore(flags); 505 } else { 506 /* 507 * If the kernel runs at VMPL0, it can change the VMSA 508 * bit for a page using the RMPADJUST instruction. 509 * However, for the instruction to succeed it must 510 * target the permissions of a lesser privileged (higher 511 * numbered) VMPL level, so use VMPL1. 512 */ 513 u64 attrs = 1; 514 515 if (make_vmsa) 516 attrs |= RMPADJUST_VMSA_PAGE_BIT; 517 518 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 519 } 520 521 return ret; 522 } 523 524 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 525 { 526 int err; 527 528 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 529 if (err) 530 pr_err("clear VMSA page failed (%u), leaking page\n", err); 531 else 532 free_page((unsigned long)vmsa); 533 } 534 535 static void set_pte_enc(pte_t *kpte, int level, void *va) 536 { 537 struct pte_enc_desc d = { 538 .kpte = kpte, 539 .pte_level = level, 540 .va = va, 541 .encrypt = true 542 }; 543 544 prepare_pte_enc(&d); 545 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 546 } 547 548 static void unshare_all_memory(void) 549 { 550 unsigned long addr, end, size, ghcb; 551 struct sev_es_runtime_data *data; 552 unsigned int npages, level; 553 bool skipped_addr; 554 pte_t *pte; 555 int cpu; 556 557 /* Unshare the direct mapping. */ 558 addr = PAGE_OFFSET; 559 end = PAGE_OFFSET + get_max_mapped(); 560 561 while (addr < end) { 562 pte = lookup_address(addr, &level); 563 size = page_level_size(level); 564 npages = size / PAGE_SIZE; 565 skipped_addr = false; 566 567 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 568 addr += size; 569 continue; 570 } 571 572 /* 573 * Ensure that all the per-CPU GHCBs are made private at the 574 * end of the unsharing loop so that the switch to the slower 575 * MSR protocol happens last. 576 */ 577 for_each_possible_cpu(cpu) { 578 data = per_cpu(runtime_data, cpu); 579 ghcb = (unsigned long)&data->ghcb_page; 580 581 /* Handle the case of a huge page containing the GHCB page */ 582 if (addr <= ghcb && ghcb < addr + size) { 583 skipped_addr = true; 584 break; 585 } 586 } 587 588 if (!skipped_addr) { 589 set_pte_enc(pte, level, (void *)addr); 590 snp_set_memory_private(addr, npages); 591 } 592 addr += size; 593 } 594 595 /* Unshare all bss decrypted memory. */ 596 addr = (unsigned long)__start_bss_decrypted; 597 end = (unsigned long)__start_bss_decrypted_unused; 598 npages = (end - addr) >> PAGE_SHIFT; 599 600 for (; addr < end; addr += PAGE_SIZE) { 601 pte = lookup_address(addr, &level); 602 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 603 continue; 604 605 set_pte_enc(pte, level, (void *)addr); 606 } 607 addr = (unsigned long)__start_bss_decrypted; 608 snp_set_memory_private(addr, npages); 609 610 __flush_tlb_all(); 611 } 612 613 /* Stop new private<->shared conversions */ 614 void snp_kexec_begin(void) 615 { 616 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 617 return; 618 619 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 620 return; 621 622 /* 623 * Crash kernel ends up here with interrupts disabled: can't wait for 624 * conversions to finish. 625 * 626 * If race happened, just report and proceed. 627 */ 628 if (!set_memory_enc_stop_conversion()) 629 pr_warn("Failed to stop shared<->private conversions\n"); 630 } 631 632 /* 633 * Shutdown all APs except the one handling kexec/kdump and clearing 634 * the VMSA tag on AP's VMSA pages as they are not being used as 635 * VMSA page anymore. 636 */ 637 static void shutdown_all_aps(void) 638 { 639 struct sev_es_save_area *vmsa; 640 int apic_id, this_cpu, cpu; 641 642 this_cpu = get_cpu(); 643 644 /* 645 * APs are already in HLT loop when enc_kexec_finish() callback 646 * is invoked. 647 */ 648 for_each_present_cpu(cpu) { 649 vmsa = per_cpu(sev_vmsa, cpu); 650 651 /* 652 * The BSP or offlined APs do not have guest allocated VMSA 653 * and there is no need to clear the VMSA tag for this page. 654 */ 655 if (!vmsa) 656 continue; 657 658 /* 659 * Cannot clear the VMSA tag for the currently running vCPU. 660 */ 661 if (this_cpu == cpu) { 662 unsigned long pa; 663 struct page *p; 664 665 pa = __pa(vmsa); 666 /* 667 * Mark the VMSA page of the running vCPU as offline 668 * so that is excluded and not touched by makedumpfile 669 * while generating vmcore during kdump. 670 */ 671 p = pfn_to_online_page(pa >> PAGE_SHIFT); 672 if (p) 673 __SetPageOffline(p); 674 continue; 675 } 676 677 apic_id = cpuid_to_apicid[cpu]; 678 679 /* 680 * Issue AP destroy to ensure AP gets kicked out of guest mode 681 * to allow using RMPADJUST to remove the VMSA tag on it's 682 * VMSA page. 683 */ 684 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); 685 snp_cleanup_vmsa(vmsa, apic_id); 686 } 687 688 put_cpu(); 689 } 690 691 void snp_kexec_finish(void) 692 { 693 struct sev_es_runtime_data *data; 694 unsigned long size, addr; 695 unsigned int level, cpu; 696 struct ghcb *ghcb; 697 pte_t *pte; 698 699 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 700 return; 701 702 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 703 return; 704 705 shutdown_all_aps(); 706 707 unshare_all_memory(); 708 709 /* 710 * Switch to using the MSR protocol to change per-CPU GHCBs to 711 * private. All the per-CPU GHCBs have been switched back to private, 712 * so can't do any more GHCB calls to the hypervisor beyond this point 713 * until the kexec'ed kernel starts running. 714 */ 715 boot_ghcb = NULL; 716 sev_cfg.ghcbs_initialized = false; 717 718 for_each_possible_cpu(cpu) { 719 data = per_cpu(runtime_data, cpu); 720 ghcb = &data->ghcb_page; 721 pte = lookup_address((unsigned long)ghcb, &level); 722 size = page_level_size(level); 723 /* Handle the case of a huge page containing the GHCB page */ 724 addr = (unsigned long)ghcb & page_level_mask(level); 725 set_pte_enc(pte, level, (void *)addr); 726 snp_set_memory_private(addr, (size / PAGE_SIZE)); 727 } 728 } 729 730 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 731 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 732 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 733 734 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 735 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 736 737 static void *snp_alloc_vmsa_page(int cpu) 738 { 739 struct page *p; 740 741 /* 742 * Allocate VMSA page to work around the SNP erratum where the CPU will 743 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 744 * collides with the RMP entry of VMSA page. The recommended workaround 745 * is to not use a large page. 746 * 747 * Allocate an 8k page which is also 8k-aligned. 748 */ 749 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 750 if (!p) 751 return NULL; 752 753 split_page(p, 1); 754 755 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 756 __free_page(p); 757 758 return page_address(p + 1); 759 } 760 761 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu) 762 { 763 struct sev_es_save_area *cur_vmsa, *vmsa; 764 struct svsm_ca *caa; 765 u8 sipi_vector; 766 int ret; 767 u64 cr4; 768 769 /* 770 * The hypervisor SNP feature support check has happened earlier, just check 771 * the AP_CREATION one here. 772 */ 773 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 774 return -EOPNOTSUPP; 775 776 /* 777 * Verify the desired start IP against the known trampoline start IP 778 * to catch any future new trampolines that may be introduced that 779 * would require a new protected guest entry point. 780 */ 781 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 782 "Unsupported SNP start_ip: %lx\n", start_ip)) 783 return -EINVAL; 784 785 /* Override start_ip with known protected guest start IP */ 786 start_ip = real_mode_header->sev_es_trampoline_start; 787 cur_vmsa = per_cpu(sev_vmsa, cpu); 788 789 /* 790 * A new VMSA is created each time because there is no guarantee that 791 * the current VMSA is the kernels or that the vCPU is not running. If 792 * an attempt was done to use the current VMSA with a running vCPU, a 793 * #VMEXIT of that vCPU would wipe out all of the settings being done 794 * here. 795 */ 796 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 797 if (!vmsa) 798 return -ENOMEM; 799 800 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 801 caa = per_cpu(svsm_caa, cpu); 802 803 /* CR4 should maintain the MCE value */ 804 cr4 = native_read_cr4() & X86_CR4_MCE; 805 806 /* Set the CS value based on the start_ip converted to a SIPI vector */ 807 sipi_vector = (start_ip >> 12); 808 vmsa->cs.base = sipi_vector << 12; 809 vmsa->cs.limit = AP_INIT_CS_LIMIT; 810 vmsa->cs.attrib = INIT_CS_ATTRIBS; 811 vmsa->cs.selector = sipi_vector << 8; 812 813 /* Set the RIP value based on start_ip */ 814 vmsa->rip = start_ip & 0xfff; 815 816 /* Set AP INIT defaults as documented in the APM */ 817 vmsa->ds.limit = AP_INIT_DS_LIMIT; 818 vmsa->ds.attrib = INIT_DS_ATTRIBS; 819 vmsa->es = vmsa->ds; 820 vmsa->fs = vmsa->ds; 821 vmsa->gs = vmsa->ds; 822 vmsa->ss = vmsa->ds; 823 824 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 825 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 826 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 827 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 828 vmsa->tr.limit = AP_INIT_TR_LIMIT; 829 vmsa->tr.attrib = INIT_TR_ATTRIBS; 830 831 vmsa->cr4 = cr4; 832 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 833 vmsa->dr7 = DR7_RESET_VALUE; 834 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 835 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 836 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 837 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 838 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 839 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 840 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 841 842 if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) 843 vmsa->vintr_ctrl |= V_GIF_MASK | V_NMI_ENABLE_MASK; 844 845 /* SVME must be set. */ 846 vmsa->efer = EFER_SVME; 847 848 /* 849 * Set the SNP-specific fields for this VMSA: 850 * VMPL level 851 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 852 */ 853 vmsa->vmpl = snp_vmpl; 854 vmsa->sev_features = sev_status >> 2; 855 856 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 857 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 858 vmsa->tsc_scale = snp_tsc_scale; 859 vmsa->tsc_offset = snp_tsc_offset; 860 } 861 862 /* Switch the page over to a VMSA page now that it is initialized */ 863 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 864 if (ret) { 865 pr_err("set VMSA page failed (%u)\n", ret); 866 free_page((unsigned long)vmsa); 867 868 return -EINVAL; 869 } 870 871 /* Issue VMGEXIT AP Creation NAE event */ 872 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); 873 if (ret) { 874 snp_cleanup_vmsa(vmsa, apic_id); 875 vmsa = NULL; 876 } 877 878 /* Free up any previous VMSA page */ 879 if (cur_vmsa) 880 snp_cleanup_vmsa(cur_vmsa, apic_id); 881 882 /* Record the current VMSA page */ 883 per_cpu(sev_vmsa, cpu) = vmsa; 884 885 return ret; 886 } 887 888 void __init snp_set_wakeup_secondary_cpu(void) 889 { 890 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 891 return; 892 893 /* 894 * Always set this override if SNP is enabled. This makes it the 895 * required method to start APs under SNP. If the hypervisor does 896 * not support AP creation, then no APs will be started. 897 */ 898 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 899 } 900 901 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 902 { 903 u16 startup_cs, startup_ip; 904 phys_addr_t jump_table_pa; 905 u64 jump_table_addr; 906 u16 __iomem *jump_table; 907 908 jump_table_addr = get_jump_table_addr(); 909 910 /* On UP guests there is no jump table so this is not a failure */ 911 if (!jump_table_addr) 912 return 0; 913 914 /* Check if AP Jump Table is page-aligned */ 915 if (jump_table_addr & ~PAGE_MASK) 916 return -EINVAL; 917 918 jump_table_pa = jump_table_addr & PAGE_MASK; 919 920 startup_cs = (u16)(rmh->trampoline_start >> 4); 921 startup_ip = (u16)(rmh->sev_es_trampoline_start - 922 rmh->trampoline_start); 923 924 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 925 if (!jump_table) 926 return -EIO; 927 928 writew(startup_ip, &jump_table[0]); 929 writew(startup_cs, &jump_table[1]); 930 931 iounmap(jump_table); 932 933 return 0; 934 } 935 936 /* 937 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 938 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 939 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 940 * 941 * When running under SVSM the CA page is needed too, so map it as well. 942 */ 943 int __init sev_es_efi_map_ghcbs_cas(pgd_t *pgd) 944 { 945 unsigned long address, pflags, pflags_enc; 946 struct sev_es_runtime_data *data; 947 int cpu; 948 u64 pfn; 949 950 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 951 return 0; 952 953 pflags = _PAGE_NX | _PAGE_RW; 954 pflags_enc = cc_mkenc(pflags); 955 956 for_each_possible_cpu(cpu) { 957 data = per_cpu(runtime_data, cpu); 958 959 address = __pa(&data->ghcb_page); 960 pfn = address >> PAGE_SHIFT; 961 962 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 963 return 1; 964 965 if (snp_vmpl) { 966 address = per_cpu(svsm_caa_pa, cpu); 967 if (!address) 968 return 1; 969 970 pfn = address >> PAGE_SHIFT; 971 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags_enc)) 972 return 1; 973 } 974 } 975 976 return 0; 977 } 978 979 u64 savic_ghcb_msr_read(u32 reg) 980 { 981 u64 msr = APIC_BASE_MSR + (reg >> 4); 982 struct pt_regs regs = { .cx = msr }; 983 struct es_em_ctxt ctxt = { .regs = ®s }; 984 struct ghcb_state state; 985 enum es_result res; 986 struct ghcb *ghcb; 987 988 guard(irqsave)(); 989 990 ghcb = __sev_get_ghcb(&state); 991 vc_ghcb_invalidate(ghcb); 992 993 res = __vc_handle_msr(ghcb, &ctxt, false); 994 if (res != ES_OK) { 995 pr_err("Secure AVIC MSR (0x%llx) read returned error (%d)\n", msr, res); 996 /* MSR read failures are treated as fatal errors */ 997 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 998 } 999 1000 __sev_put_ghcb(&state); 1001 1002 return regs.ax | regs.dx << 32; 1003 } 1004 1005 void savic_ghcb_msr_write(u32 reg, u64 value) 1006 { 1007 u64 msr = APIC_BASE_MSR + (reg >> 4); 1008 struct pt_regs regs = { 1009 .cx = msr, 1010 .ax = lower_32_bits(value), 1011 .dx = upper_32_bits(value) 1012 }; 1013 struct es_em_ctxt ctxt = { .regs = ®s }; 1014 struct ghcb_state state; 1015 enum es_result res; 1016 struct ghcb *ghcb; 1017 1018 guard(irqsave)(); 1019 1020 ghcb = __sev_get_ghcb(&state); 1021 vc_ghcb_invalidate(ghcb); 1022 1023 res = __vc_handle_msr(ghcb, &ctxt, true); 1024 if (res != ES_OK) { 1025 pr_err("Secure AVIC MSR (0x%llx) write returned error (%d)\n", msr, res); 1026 /* MSR writes should never fail. Any failure is fatal error for SNP guest */ 1027 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SAVIC_FAIL); 1028 } 1029 1030 __sev_put_ghcb(&state); 1031 } 1032 1033 enum es_result savic_register_gpa(u64 gpa) 1034 { 1035 struct ghcb_state state; 1036 struct es_em_ctxt ctxt; 1037 enum es_result res; 1038 struct ghcb *ghcb; 1039 1040 guard(irqsave)(); 1041 1042 ghcb = __sev_get_ghcb(&state); 1043 vc_ghcb_invalidate(ghcb); 1044 1045 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); 1046 ghcb_set_rbx(ghcb, gpa); 1047 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, 1048 SVM_VMGEXIT_SAVIC_REGISTER_GPA, 0); 1049 1050 __sev_put_ghcb(&state); 1051 1052 return res; 1053 } 1054 1055 enum es_result savic_unregister_gpa(u64 *gpa) 1056 { 1057 struct ghcb_state state; 1058 struct es_em_ctxt ctxt; 1059 enum es_result res; 1060 struct ghcb *ghcb; 1061 1062 guard(irqsave)(); 1063 1064 ghcb = __sev_get_ghcb(&state); 1065 vc_ghcb_invalidate(ghcb); 1066 1067 ghcb_set_rax(ghcb, SVM_VMGEXIT_SAVIC_SELF_GPA); 1068 res = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_SAVIC, 1069 SVM_VMGEXIT_SAVIC_UNREGISTER_GPA, 0); 1070 if (gpa && res == ES_OK) 1071 *gpa = ghcb->save.rbx; 1072 1073 __sev_put_ghcb(&state); 1074 1075 return res; 1076 } 1077 1078 static void snp_register_per_cpu_ghcb(void) 1079 { 1080 struct sev_es_runtime_data *data; 1081 struct ghcb *ghcb; 1082 1083 data = this_cpu_read(runtime_data); 1084 ghcb = &data->ghcb_page; 1085 1086 snp_register_ghcb_early(__pa(ghcb)); 1087 } 1088 1089 void setup_ghcb(void) 1090 { 1091 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1092 return; 1093 1094 /* 1095 * Check whether the runtime #VC exception handler is active. It uses 1096 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1097 * 1098 * If SNP is active, register the per-CPU GHCB page so that the runtime 1099 * exception handler can use it. 1100 */ 1101 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1102 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1103 snp_register_per_cpu_ghcb(); 1104 1105 sev_cfg.ghcbs_initialized = true; 1106 1107 return; 1108 } 1109 1110 /* 1111 * Make sure the hypervisor talks a supported protocol. 1112 * This gets called only in the BSP boot phase. 1113 */ 1114 if (!sev_es_negotiate_protocol()) 1115 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1116 1117 /* 1118 * Clear the boot_ghcb. The first exception comes in before the bss 1119 * section is cleared. 1120 */ 1121 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1122 1123 /* Alright - Make the boot-ghcb public */ 1124 boot_ghcb = &boot_ghcb_page; 1125 1126 /* SNP guest requires that GHCB GPA must be registered. */ 1127 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1128 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1129 } 1130 1131 #ifdef CONFIG_HOTPLUG_CPU 1132 static void sev_es_ap_hlt_loop(void) 1133 { 1134 struct ghcb_state state; 1135 struct ghcb *ghcb; 1136 1137 ghcb = __sev_get_ghcb(&state); 1138 1139 while (true) { 1140 vc_ghcb_invalidate(ghcb); 1141 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1142 ghcb_set_sw_exit_info_1(ghcb, 0); 1143 ghcb_set_sw_exit_info_2(ghcb, 0); 1144 1145 sev_es_wr_ghcb_msr(__pa(ghcb)); 1146 VMGEXIT(); 1147 1148 /* Wakeup signal? */ 1149 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1150 ghcb->save.sw_exit_info_2) 1151 break; 1152 } 1153 1154 __sev_put_ghcb(&state); 1155 } 1156 1157 /* 1158 * Play_dead handler when running under SEV-ES. This is needed because 1159 * the hypervisor can't deliver an SIPI request to restart the AP. 1160 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1161 * hypervisor wakes it up again. 1162 */ 1163 static void sev_es_play_dead(void) 1164 { 1165 play_dead_common(); 1166 1167 /* IRQs now disabled */ 1168 1169 sev_es_ap_hlt_loop(); 1170 1171 /* 1172 * If we get here, the VCPU was woken up again. Jump to CPU 1173 * startup code to get it back online. 1174 */ 1175 soft_restart_cpu(); 1176 } 1177 #else /* CONFIG_HOTPLUG_CPU */ 1178 #define sev_es_play_dead native_play_dead 1179 #endif /* CONFIG_HOTPLUG_CPU */ 1180 1181 #ifdef CONFIG_SMP 1182 static void __init sev_es_setup_play_dead(void) 1183 { 1184 smp_ops.play_dead = sev_es_play_dead; 1185 } 1186 #else 1187 static inline void sev_es_setup_play_dead(void) { } 1188 #endif 1189 1190 static void __init alloc_runtime_data(int cpu) 1191 { 1192 struct sev_es_runtime_data *data; 1193 1194 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1195 if (!data) 1196 panic("Can't allocate SEV-ES runtime data"); 1197 1198 per_cpu(runtime_data, cpu) = data; 1199 1200 if (snp_vmpl) { 1201 struct svsm_ca *caa; 1202 1203 /* Allocate the SVSM CA page if an SVSM is present */ 1204 caa = cpu ? memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE) 1205 : &boot_svsm_ca_page; 1206 1207 per_cpu(svsm_caa, cpu) = caa; 1208 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1209 } 1210 } 1211 1212 static void __init init_ghcb(int cpu) 1213 { 1214 struct sev_es_runtime_data *data; 1215 int err; 1216 1217 data = per_cpu(runtime_data, cpu); 1218 1219 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1220 sizeof(data->ghcb_page)); 1221 if (err) 1222 panic("Can't map GHCBs unencrypted"); 1223 1224 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1225 1226 data->ghcb_active = false; 1227 data->backup_ghcb_active = false; 1228 } 1229 1230 void __init sev_es_init_vc_handling(void) 1231 { 1232 int cpu; 1233 1234 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1235 1236 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1237 return; 1238 1239 if (!sev_es_check_cpu_features()) 1240 panic("SEV-ES CPU Features missing"); 1241 1242 /* 1243 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1244 * features. 1245 */ 1246 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1247 sev_hv_features = get_hv_features(); 1248 1249 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1250 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1251 } 1252 1253 /* Initialize per-cpu GHCB pages */ 1254 for_each_possible_cpu(cpu) { 1255 alloc_runtime_data(cpu); 1256 init_ghcb(cpu); 1257 } 1258 1259 if (snp_vmpl) 1260 sev_cfg.use_cas = true; 1261 1262 sev_es_setup_play_dead(); 1263 1264 /* Secondary CPUs use the runtime #VC handler */ 1265 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1266 } 1267 1268 /* 1269 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 1270 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 1271 * ROM region can cause a crash since this region is not pre-validated. 1272 */ 1273 void __init snp_dmi_setup(void) 1274 { 1275 if (efi_enabled(EFI_CONFIG_TABLES)) 1276 dmi_setup(); 1277 } 1278 1279 static void dump_cpuid_table(void) 1280 { 1281 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 1282 int i = 0; 1283 1284 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 1285 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 1286 1287 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 1288 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 1289 1290 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 1291 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 1292 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 1293 } 1294 } 1295 1296 /* 1297 * It is useful from an auditing/testing perspective to provide an easy way 1298 * for the guest owner to know that the CPUID table has been initialized as 1299 * expected, but that initialization happens too early in boot to print any 1300 * sort of indicator, and there's not really any other good place to do it, 1301 * so do it here. 1302 * 1303 * If running as an SNP guest, report the current VM privilege level (VMPL). 1304 */ 1305 static int __init report_snp_info(void) 1306 { 1307 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 1308 1309 if (cpuid_table->count) { 1310 pr_info("Using SNP CPUID table, %d entries present.\n", 1311 cpuid_table->count); 1312 1313 if (sev_cfg.debug) 1314 dump_cpuid_table(); 1315 } 1316 1317 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1318 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 1319 1320 return 0; 1321 } 1322 arch_initcall(report_snp_info); 1323 1324 static int snp_issue_guest_request(struct snp_guest_req *req) 1325 { 1326 struct snp_req_data *input = &req->input; 1327 struct ghcb_state state; 1328 struct es_em_ctxt ctxt; 1329 unsigned long flags; 1330 struct ghcb *ghcb; 1331 int ret; 1332 1333 req->exitinfo2 = SEV_RET_NO_FW_CALL; 1334 1335 /* 1336 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 1337 * a per-CPU GHCB. 1338 */ 1339 local_irq_save(flags); 1340 1341 ghcb = __sev_get_ghcb(&state); 1342 if (!ghcb) { 1343 ret = -EIO; 1344 goto e_restore_irq; 1345 } 1346 1347 vc_ghcb_invalidate(ghcb); 1348 1349 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 1350 ghcb_set_rax(ghcb, input->data_gpa); 1351 ghcb_set_rbx(ghcb, input->data_npages); 1352 } 1353 1354 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 1355 if (ret) 1356 goto e_put; 1357 1358 req->exitinfo2 = ghcb->save.sw_exit_info_2; 1359 switch (req->exitinfo2) { 1360 case 0: 1361 break; 1362 1363 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 1364 ret = -EAGAIN; 1365 break; 1366 1367 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 1368 /* Number of expected pages are returned in RBX */ 1369 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 1370 input->data_npages = ghcb_get_rbx(ghcb); 1371 ret = -ENOSPC; 1372 break; 1373 } 1374 fallthrough; 1375 default: 1376 ret = -EIO; 1377 break; 1378 } 1379 1380 e_put: 1381 __sev_put_ghcb(&state); 1382 e_restore_irq: 1383 local_irq_restore(flags); 1384 1385 return ret; 1386 } 1387 1388 static struct platform_device sev_guest_device = { 1389 .name = "sev-guest", 1390 .id = -1, 1391 }; 1392 1393 static struct platform_device tpm_svsm_device = { 1394 .name = "tpm-svsm", 1395 .id = -1, 1396 }; 1397 1398 static int __init snp_init_platform_device(void) 1399 { 1400 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1401 return -ENODEV; 1402 1403 if (platform_device_register(&sev_guest_device)) 1404 return -ENODEV; 1405 1406 if (snp_svsm_vtpm_probe() && 1407 platform_device_register(&tpm_svsm_device)) 1408 return -ENODEV; 1409 1410 pr_info("SNP guest platform devices initialized.\n"); 1411 return 0; 1412 } 1413 device_initcall(snp_init_platform_device); 1414 1415 void sev_show_status(void) 1416 { 1417 int i; 1418 1419 pr_info("Status: "); 1420 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 1421 if (sev_status & BIT_ULL(i)) { 1422 if (!sev_status_feat_names[i]) 1423 continue; 1424 1425 pr_cont("%s ", sev_status_feat_names[i]); 1426 } 1427 } 1428 pr_cont("\n"); 1429 } 1430 1431 #ifdef CONFIG_SYSFS 1432 static ssize_t vmpl_show(struct kobject *kobj, 1433 struct kobj_attribute *attr, char *buf) 1434 { 1435 return sysfs_emit(buf, "%d\n", snp_vmpl); 1436 } 1437 1438 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 1439 1440 static struct attribute *vmpl_attrs[] = { 1441 &vmpl_attr.attr, 1442 NULL 1443 }; 1444 1445 static struct attribute_group sev_attr_group = { 1446 .attrs = vmpl_attrs, 1447 }; 1448 1449 static int __init sev_sysfs_init(void) 1450 { 1451 struct kobject *sev_kobj; 1452 struct device *dev_root; 1453 int ret; 1454 1455 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1456 return -ENODEV; 1457 1458 dev_root = bus_get_dev_root(&cpu_subsys); 1459 if (!dev_root) 1460 return -ENODEV; 1461 1462 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 1463 put_device(dev_root); 1464 1465 if (!sev_kobj) 1466 return -ENOMEM; 1467 1468 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 1469 if (ret) 1470 kobject_put(sev_kobj); 1471 1472 return ret; 1473 } 1474 arch_initcall(sev_sysfs_init); 1475 #endif // CONFIG_SYSFS 1476 1477 static void free_shared_pages(void *buf, size_t sz) 1478 { 1479 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 1480 int ret; 1481 1482 if (!buf) 1483 return; 1484 1485 ret = set_memory_encrypted((unsigned long)buf, npages); 1486 if (ret) { 1487 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 1488 return; 1489 } 1490 1491 __free_pages(virt_to_page(buf), get_order(sz)); 1492 } 1493 1494 static void *alloc_shared_pages(size_t sz) 1495 { 1496 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 1497 struct page *page; 1498 int ret; 1499 1500 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 1501 if (!page) 1502 return NULL; 1503 1504 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 1505 if (ret) { 1506 pr_err("failed to mark page shared, ret=%d\n", ret); 1507 __free_pages(page, get_order(sz)); 1508 return NULL; 1509 } 1510 1511 return page_address(page); 1512 } 1513 1514 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 1515 { 1516 u8 *key = NULL; 1517 1518 switch (id) { 1519 case 0: 1520 *seqno = &secrets->os_area.msg_seqno_0; 1521 key = secrets->vmpck0; 1522 break; 1523 case 1: 1524 *seqno = &secrets->os_area.msg_seqno_1; 1525 key = secrets->vmpck1; 1526 break; 1527 case 2: 1528 *seqno = &secrets->os_area.msg_seqno_2; 1529 key = secrets->vmpck2; 1530 break; 1531 case 3: 1532 *seqno = &secrets->os_area.msg_seqno_3; 1533 key = secrets->vmpck3; 1534 break; 1535 default: 1536 break; 1537 } 1538 1539 return key; 1540 } 1541 1542 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 1543 { 1544 struct aesgcm_ctx *ctx; 1545 1546 ctx = kzalloc_obj(*ctx); 1547 if (!ctx) 1548 return NULL; 1549 1550 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 1551 pr_err("Crypto context initialization failed\n"); 1552 kfree(ctx); 1553 return NULL; 1554 } 1555 1556 return ctx; 1557 } 1558 1559 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 1560 { 1561 /* Adjust the default VMPCK key based on the executing VMPL level */ 1562 if (vmpck_id == -1) 1563 vmpck_id = snp_vmpl; 1564 1565 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 1566 if (!mdesc->vmpck) { 1567 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 1568 return -EINVAL; 1569 } 1570 1571 /* Verify that VMPCK is not zero. */ 1572 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 1573 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 1574 return -EINVAL; 1575 } 1576 1577 mdesc->vmpck_id = vmpck_id; 1578 1579 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 1580 if (!mdesc->ctx) 1581 return -ENOMEM; 1582 1583 return 0; 1584 } 1585 EXPORT_SYMBOL_GPL(snp_msg_init); 1586 1587 struct snp_msg_desc *snp_msg_alloc(void) 1588 { 1589 struct snp_msg_desc *mdesc; 1590 void __iomem *mem; 1591 1592 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 1593 1594 mdesc = kzalloc_obj(struct snp_msg_desc); 1595 if (!mdesc) 1596 return ERR_PTR(-ENOMEM); 1597 1598 mem = ioremap_encrypted(sev_secrets_pa, PAGE_SIZE); 1599 if (!mem) 1600 goto e_free_mdesc; 1601 1602 mdesc->secrets = (__force struct snp_secrets_page *)mem; 1603 1604 /* Allocate the shared page used for the request and response message. */ 1605 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 1606 if (!mdesc->request) 1607 goto e_unmap; 1608 1609 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 1610 if (!mdesc->response) 1611 goto e_free_request; 1612 1613 return mdesc; 1614 1615 e_free_request: 1616 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 1617 e_unmap: 1618 iounmap(mem); 1619 e_free_mdesc: 1620 kfree(mdesc); 1621 1622 return ERR_PTR(-ENOMEM); 1623 } 1624 EXPORT_SYMBOL_GPL(snp_msg_alloc); 1625 1626 void snp_msg_free(struct snp_msg_desc *mdesc) 1627 { 1628 if (!mdesc) 1629 return; 1630 1631 kfree(mdesc->ctx); 1632 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 1633 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 1634 iounmap((__force void __iomem *)mdesc->secrets); 1635 1636 kfree_sensitive(mdesc); 1637 } 1638 EXPORT_SYMBOL_GPL(snp_msg_free); 1639 1640 /* Mutex to serialize the shared buffer access and command handling. */ 1641 static DEFINE_MUTEX(snp_cmd_mutex); 1642 1643 /* 1644 * If an error is received from the host or AMD Secure Processor (ASP) there 1645 * are two options. Either retry the exact same encrypted request or discontinue 1646 * using the VMPCK. 1647 * 1648 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 1649 * encrypt the requests. The IV for this scheme is the sequence number. GCM 1650 * cannot tolerate IV reuse. 1651 * 1652 * The ASP FW v1.51 only increments the sequence numbers on a successful 1653 * guest<->ASP back and forth and only accepts messages at its exact sequence 1654 * number. 1655 * 1656 * So if the sequence number were to be reused the encryption scheme is 1657 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 1658 * will reject the request. 1659 */ 1660 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 1661 { 1662 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 1663 mdesc->vmpck_id); 1664 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 1665 mdesc->vmpck = NULL; 1666 } 1667 1668 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 1669 { 1670 u64 count; 1671 1672 lockdep_assert_held(&snp_cmd_mutex); 1673 1674 /* Read the current message sequence counter from secrets pages */ 1675 count = *mdesc->os_area_msg_seqno; 1676 1677 return count + 1; 1678 } 1679 1680 /* Return a non-zero on success */ 1681 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 1682 { 1683 u64 count = __snp_get_msg_seqno(mdesc); 1684 1685 /* 1686 * The message sequence counter for the SNP guest request is a 64-bit 1687 * value but the version 2 of GHCB specification defines a 32-bit storage 1688 * for it. If the counter exceeds the 32-bit value then return zero. 1689 * The caller should check the return value, but if the caller happens to 1690 * not check the value and use it, then the firmware treats zero as an 1691 * invalid number and will fail the message request. 1692 */ 1693 if (count >= UINT_MAX) { 1694 pr_err("request message sequence counter overflow\n"); 1695 return 0; 1696 } 1697 1698 return count; 1699 } 1700 1701 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 1702 { 1703 /* 1704 * The counter is also incremented by the PSP, so increment it by 2 1705 * and save in secrets page. 1706 */ 1707 *mdesc->os_area_msg_seqno += 2; 1708 } 1709 1710 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 1711 { 1712 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 1713 struct snp_guest_msg *req_msg = &mdesc->secret_request; 1714 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 1715 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 1716 struct aesgcm_ctx *ctx = mdesc->ctx; 1717 u8 iv[GCM_AES_IV_SIZE] = {}; 1718 1719 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 1720 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 1721 resp_msg_hdr->msg_sz); 1722 1723 /* Copy response from shared memory to encrypted memory. */ 1724 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 1725 1726 /* Verify that the sequence counter is incremented by 1 */ 1727 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 1728 return -EBADMSG; 1729 1730 /* Verify response message type and version number. */ 1731 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 1732 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 1733 return -EBADMSG; 1734 1735 /* 1736 * If the message size is greater than our buffer length then return 1737 * an error. 1738 */ 1739 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 1740 return -EBADMSG; 1741 1742 /* Decrypt the payload */ 1743 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 1744 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 1745 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 1746 return -EBADMSG; 1747 1748 return 0; 1749 } 1750 1751 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 1752 { 1753 struct snp_guest_msg *msg = &mdesc->secret_request; 1754 struct snp_guest_msg_hdr *hdr = &msg->hdr; 1755 struct aesgcm_ctx *ctx = mdesc->ctx; 1756 u8 iv[GCM_AES_IV_SIZE] = {}; 1757 1758 memset(msg, 0, sizeof(*msg)); 1759 1760 hdr->algo = SNP_AEAD_AES_256_GCM; 1761 hdr->hdr_version = MSG_HDR_VER; 1762 hdr->hdr_sz = sizeof(*hdr); 1763 hdr->msg_type = req->msg_type; 1764 hdr->msg_version = req->msg_version; 1765 hdr->msg_seqno = seqno; 1766 hdr->msg_vmpck = req->vmpck_id; 1767 hdr->msg_sz = req->req_sz; 1768 1769 /* Verify the sequence number is non-zero */ 1770 if (!hdr->msg_seqno) 1771 return -ENOSR; 1772 1773 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 1774 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 1775 1776 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 1777 return -EBADMSG; 1778 1779 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 1780 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 1781 AAD_LEN, iv, hdr->authtag); 1782 1783 return 0; 1784 } 1785 1786 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 1787 { 1788 unsigned long req_start = jiffies; 1789 unsigned int override_npages = 0; 1790 u64 override_err = 0; 1791 int rc; 1792 1793 retry_request: 1794 /* 1795 * Call firmware to process the request. In this function the encrypted 1796 * message enters shared memory with the host. So after this call the 1797 * sequence number must be incremented or the VMPCK must be deleted to 1798 * prevent reuse of the IV. 1799 */ 1800 rc = snp_issue_guest_request(req); 1801 switch (rc) { 1802 case -ENOSPC: 1803 /* 1804 * If the extended guest request fails due to having too 1805 * small of a certificate data buffer, retry the same 1806 * guest request without the extended data request in 1807 * order to increment the sequence number and thus avoid 1808 * IV reuse. 1809 */ 1810 override_npages = req->input.data_npages; 1811 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 1812 1813 /* 1814 * Override the error to inform callers the given extended 1815 * request buffer size was too small and give the caller the 1816 * required buffer size. 1817 */ 1818 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 1819 1820 /* 1821 * If this call to the firmware succeeds, the sequence number can 1822 * be incremented allowing for continued use of the VMPCK. If 1823 * there is an error reflected in the return value, this value 1824 * is checked further down and the result will be the deletion 1825 * of the VMPCK and the error code being propagated back to the 1826 * user as an ioctl() return code. 1827 */ 1828 goto retry_request; 1829 1830 /* 1831 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 1832 * throttled. Retry in the driver to avoid returning and reusing the 1833 * message sequence number on a different message. 1834 */ 1835 case -EAGAIN: 1836 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 1837 rc = -ETIMEDOUT; 1838 break; 1839 } 1840 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 1841 goto retry_request; 1842 } 1843 1844 /* 1845 * Increment the message sequence number. There is no harm in doing 1846 * this now because decryption uses the value stored in the response 1847 * structure and any failure will wipe the VMPCK, preventing further 1848 * use anyway. 1849 */ 1850 snp_inc_msg_seqno(mdesc); 1851 1852 if (override_err) { 1853 req->exitinfo2 = override_err; 1854 1855 /* 1856 * If an extended guest request was issued and the supplied certificate 1857 * buffer was not large enough, a standard guest request was issued to 1858 * prevent IV reuse. If the standard request was successful, return -EIO 1859 * back to the caller as would have originally been returned. 1860 */ 1861 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 1862 rc = -EIO; 1863 } 1864 1865 if (override_npages) 1866 req->input.data_npages = override_npages; 1867 1868 return rc; 1869 } 1870 1871 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 1872 { 1873 u64 seqno; 1874 int rc; 1875 1876 /* 1877 * enc_payload() calls aesgcm_encrypt(), which can potentially offload to HW. 1878 * The offload's DMA SG list of data to encrypt has to be in linear mapping. 1879 */ 1880 if (!virt_addr_valid(req->req_buf) || !virt_addr_valid(req->resp_buf)) { 1881 pr_warn("AES-GSM buffers must be in linear mapping"); 1882 return -EINVAL; 1883 } 1884 1885 guard(mutex)(&snp_cmd_mutex); 1886 1887 /* Check if the VMPCK is not empty */ 1888 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 1889 pr_err_ratelimited("VMPCK is disabled\n"); 1890 return -ENOTTY; 1891 } 1892 1893 /* Get message sequence and verify that its a non-zero */ 1894 seqno = snp_get_msg_seqno(mdesc); 1895 if (!seqno) 1896 return -EIO; 1897 1898 /* Clear shared memory's response for the host to populate. */ 1899 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 1900 1901 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 1902 rc = enc_payload(mdesc, seqno, req); 1903 if (rc) 1904 return rc; 1905 1906 /* 1907 * Write the fully encrypted request to the shared unencrypted 1908 * request page. 1909 */ 1910 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 1911 1912 /* Initialize the input address for guest request */ 1913 req->input.req_gpa = __pa(mdesc->request); 1914 req->input.resp_gpa = __pa(mdesc->response); 1915 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; 1916 1917 rc = __handle_guest_request(mdesc, req); 1918 if (rc) { 1919 if (rc == -EIO && 1920 req->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 1921 return rc; 1922 1923 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 1924 rc, req->exitinfo2); 1925 1926 snp_disable_vmpck(mdesc); 1927 return rc; 1928 } 1929 1930 rc = verify_and_dec_payload(mdesc, req); 1931 if (rc) { 1932 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 1933 snp_disable_vmpck(mdesc); 1934 return rc; 1935 } 1936 1937 return 0; 1938 } 1939 EXPORT_SYMBOL_GPL(snp_send_guest_request); 1940 1941 static int __init snp_get_tsc_info(void) 1942 { 1943 struct snp_tsc_info_resp *tsc_resp; 1944 struct snp_tsc_info_req *tsc_req; 1945 struct snp_msg_desc *mdesc; 1946 struct snp_guest_req req = {}; 1947 int rc = -ENOMEM; 1948 1949 tsc_req = kzalloc_obj(*tsc_req); 1950 if (!tsc_req) 1951 return rc; 1952 1953 /* 1954 * The intermediate response buffer is used while decrypting the 1955 * response payload. Make sure that it has enough space to cover 1956 * the authtag. 1957 */ 1958 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 1959 if (!tsc_resp) 1960 goto e_free_tsc_req; 1961 1962 mdesc = snp_msg_alloc(); 1963 if (IS_ERR_OR_NULL(mdesc)) 1964 goto e_free_tsc_resp; 1965 1966 rc = snp_msg_init(mdesc, snp_vmpl); 1967 if (rc) 1968 goto e_free_mdesc; 1969 1970 req.msg_version = MSG_HDR_VER; 1971 req.msg_type = SNP_MSG_TSC_INFO_REQ; 1972 req.vmpck_id = snp_vmpl; 1973 req.req_buf = tsc_req; 1974 req.req_sz = sizeof(*tsc_req); 1975 req.resp_buf = (void *)tsc_resp; 1976 req.resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 1977 req.exit_code = SVM_VMGEXIT_GUEST_REQUEST; 1978 1979 rc = snp_send_guest_request(mdesc, &req); 1980 if (rc) 1981 goto e_request; 1982 1983 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 1984 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 1985 tsc_resp->tsc_factor); 1986 1987 if (!tsc_resp->status) { 1988 snp_tsc_scale = tsc_resp->tsc_scale; 1989 snp_tsc_offset = tsc_resp->tsc_offset; 1990 } else { 1991 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 1992 rc = -EIO; 1993 } 1994 1995 e_request: 1996 /* The response buffer contains sensitive data, explicitly clear it. */ 1997 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 1998 e_free_mdesc: 1999 snp_msg_free(mdesc); 2000 e_free_tsc_resp: 2001 kfree(tsc_resp); 2002 e_free_tsc_req: 2003 kfree(tsc_req); 2004 2005 return rc; 2006 } 2007 2008 void __init snp_secure_tsc_prepare(void) 2009 { 2010 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 2011 return; 2012 2013 if (snp_get_tsc_info()) { 2014 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 2015 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 2016 } 2017 2018 pr_debug("SecureTSC enabled"); 2019 } 2020 2021 static unsigned long securetsc_get_tsc_khz(void) 2022 { 2023 return snp_tsc_freq_khz; 2024 } 2025 2026 void __init snp_secure_tsc_init(void) 2027 { 2028 struct snp_secrets_page *secrets; 2029 unsigned long tsc_freq_mhz; 2030 void *mem; 2031 2032 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 2033 return; 2034 2035 mem = early_memremap_encrypted(sev_secrets_pa, PAGE_SIZE); 2036 if (!mem) { 2037 pr_err("Unable to get TSC_FACTOR: failed to map the SNP secrets page.\n"); 2038 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 2039 } 2040 2041 secrets = (__force struct snp_secrets_page *)mem; 2042 2043 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 2044 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 2045 2046 /* Extract the GUEST TSC MHZ from BIT[17:0], rest is reserved space */ 2047 tsc_freq_mhz &= GENMASK_ULL(17, 0); 2048 2049 snp_tsc_freq_khz = SNP_SCALE_TSC_FREQ(tsc_freq_mhz * 1000, secrets->tsc_factor); 2050 2051 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 2052 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 2053 2054 early_memunmap(mem, PAGE_SIZE); 2055 } 2056