1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/cpumask.h> 22 #include <linux/efi.h> 23 #include <linux/platform_device.h> 24 #include <linux/io.h> 25 #include <linux/psp-sev.h> 26 #include <linux/dmi.h> 27 #include <uapi/linux/sev-guest.h> 28 #include <crypto/gcm.h> 29 30 #include <asm/init.h> 31 #include <asm/cpu_entry_area.h> 32 #include <asm/stacktrace.h> 33 #include <asm/sev.h> 34 #include <asm/insn-eval.h> 35 #include <asm/fpu/xcr.h> 36 #include <asm/processor.h> 37 #include <asm/realmode.h> 38 #include <asm/setup.h> 39 #include <asm/traps.h> 40 #include <asm/svm.h> 41 #include <asm/smp.h> 42 #include <asm/cpu.h> 43 #include <asm/apic.h> 44 #include <asm/cpuid.h> 45 #include <asm/cmdline.h> 46 47 #define DR7_RESET_VALUE 0x400 48 49 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 50 #define AP_INIT_CS_LIMIT 0xffff 51 #define AP_INIT_DS_LIMIT 0xffff 52 #define AP_INIT_LDTR_LIMIT 0xffff 53 #define AP_INIT_GDTR_LIMIT 0xffff 54 #define AP_INIT_IDTR_LIMIT 0xffff 55 #define AP_INIT_TR_LIMIT 0xffff 56 #define AP_INIT_RFLAGS_DEFAULT 0x2 57 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 58 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 59 #define AP_INIT_XCR0_DEFAULT 0x1 60 #define AP_INIT_X87_FTW_DEFAULT 0x5555 61 #define AP_INIT_X87_FCW_DEFAULT 0x0040 62 #define AP_INIT_CR0_DEFAULT 0x60000010 63 #define AP_INIT_MXCSR_DEFAULT 0x1f80 64 65 static const char * const sev_status_feat_names[] = { 66 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 67 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 68 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 69 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 70 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 71 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 72 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 73 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 74 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 75 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 76 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 77 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 78 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 79 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 80 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 81 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 82 }; 83 84 /* For early boot hypervisor communication in SEV-ES enabled guests */ 85 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 86 87 /* 88 * Needs to be in the .data section because we need it NULL before bss is 89 * cleared 90 */ 91 static struct ghcb *boot_ghcb __section(".data"); 92 93 /* Bitmap of SEV features supported by the hypervisor */ 94 static u64 sev_hv_features __ro_after_init; 95 96 /* Secrets page physical address from the CC blob */ 97 static u64 secrets_pa __ro_after_init; 98 99 /* 100 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 101 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 102 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 103 */ 104 static u64 snp_tsc_scale __ro_after_init; 105 static u64 snp_tsc_offset __ro_after_init; 106 static u64 snp_tsc_freq_khz __ro_after_init; 107 108 /* #VC handler runtime per-CPU data */ 109 struct sev_es_runtime_data { 110 struct ghcb ghcb_page; 111 112 /* 113 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 114 * It is needed when an NMI happens while the #VC handler uses the real 115 * GHCB, and the NMI handler itself is causing another #VC exception. In 116 * that case the GHCB content of the first handler needs to be backed up 117 * and restored. 118 */ 119 struct ghcb backup_ghcb; 120 121 /* 122 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 123 * There is no need for it to be atomic, because nothing is written to 124 * the GHCB between the read and the write of ghcb_active. So it is safe 125 * to use it when a nested #VC exception happens before the write. 126 * 127 * This is necessary for example in the #VC->NMI->#VC case when the NMI 128 * happens while the first #VC handler uses the GHCB. When the NMI code 129 * raises a second #VC handler it might overwrite the contents of the 130 * GHCB written by the first handler. To avoid this the content of the 131 * GHCB is saved and restored when the GHCB is detected to be in use 132 * already. 133 */ 134 bool ghcb_active; 135 bool backup_ghcb_active; 136 137 /* 138 * Cached DR7 value - write it on DR7 writes and return it on reads. 139 * That value will never make it to the real hardware DR7 as debugging 140 * is currently unsupported in SEV-ES guests. 141 */ 142 unsigned long dr7; 143 }; 144 145 struct ghcb_state { 146 struct ghcb *ghcb; 147 }; 148 149 /* For early boot SVSM communication */ 150 static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); 151 152 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 153 static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 154 static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); 155 static DEFINE_PER_CPU(u64, svsm_caa_pa); 156 157 static __always_inline bool on_vc_stack(struct pt_regs *regs) 158 { 159 unsigned long sp = regs->sp; 160 161 /* User-mode RSP is not trusted */ 162 if (user_mode(regs)) 163 return false; 164 165 /* SYSCALL gap still has user-mode RSP */ 166 if (ip_within_syscall_gap(regs)) 167 return false; 168 169 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 170 } 171 172 /* 173 * This function handles the case when an NMI is raised in the #VC 174 * exception handler entry code, before the #VC handler has switched off 175 * its IST stack. In this case, the IST entry for #VC must be adjusted, 176 * so that any nested #VC exception will not overwrite the stack 177 * contents of the interrupted #VC handler. 178 * 179 * The IST entry is adjusted unconditionally so that it can be also be 180 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 181 * nested sev_es_ist_exit() call may adjust back the IST entry too 182 * early. 183 * 184 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 185 * on the NMI IST stack, as they are only called from NMI handling code 186 * right now. 187 */ 188 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 189 { 190 unsigned long old_ist, new_ist; 191 192 /* Read old IST entry */ 193 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 194 195 /* 196 * If NMI happened while on the #VC IST stack, set the new IST 197 * value below regs->sp, so that the interrupted stack frame is 198 * not overwritten by subsequent #VC exceptions. 199 */ 200 if (on_vc_stack(regs)) 201 new_ist = regs->sp; 202 203 /* 204 * Reserve additional 8 bytes and store old IST value so this 205 * adjustment can be unrolled in __sev_es_ist_exit(). 206 */ 207 new_ist -= sizeof(old_ist); 208 *(unsigned long *)new_ist = old_ist; 209 210 /* Set new IST entry */ 211 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 212 } 213 214 void noinstr __sev_es_ist_exit(void) 215 { 216 unsigned long ist; 217 218 /* Read IST entry */ 219 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 220 221 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 222 return; 223 224 /* Read back old IST entry and write it to the TSS */ 225 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 226 } 227 228 /* 229 * Nothing shall interrupt this code path while holding the per-CPU 230 * GHCB. The backup GHCB is only for NMIs interrupting this path. 231 * 232 * Callers must disable local interrupts around it. 233 */ 234 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 235 { 236 struct sev_es_runtime_data *data; 237 struct ghcb *ghcb; 238 239 WARN_ON(!irqs_disabled()); 240 241 data = this_cpu_read(runtime_data); 242 ghcb = &data->ghcb_page; 243 244 if (unlikely(data->ghcb_active)) { 245 /* GHCB is already in use - save its contents */ 246 247 if (unlikely(data->backup_ghcb_active)) { 248 /* 249 * Backup-GHCB is also already in use. There is no way 250 * to continue here so just kill the machine. To make 251 * panic() work, mark GHCBs inactive so that messages 252 * can be printed out. 253 */ 254 data->ghcb_active = false; 255 data->backup_ghcb_active = false; 256 257 instrumentation_begin(); 258 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 259 instrumentation_end(); 260 } 261 262 /* Mark backup_ghcb active before writing to it */ 263 data->backup_ghcb_active = true; 264 265 state->ghcb = &data->backup_ghcb; 266 267 /* Backup GHCB content */ 268 *state->ghcb = *ghcb; 269 } else { 270 state->ghcb = NULL; 271 data->ghcb_active = true; 272 } 273 274 return ghcb; 275 } 276 277 static inline u64 sev_es_rd_ghcb_msr(void) 278 { 279 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 280 } 281 282 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 283 { 284 u32 low, high; 285 286 low = (u32)(val); 287 high = (u32)(val >> 32); 288 289 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 290 } 291 292 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 293 unsigned char *buffer) 294 { 295 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 296 } 297 298 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 299 { 300 char buffer[MAX_INSN_SIZE]; 301 int insn_bytes; 302 303 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 304 if (insn_bytes == 0) { 305 /* Nothing could be copied */ 306 ctxt->fi.vector = X86_TRAP_PF; 307 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 308 ctxt->fi.cr2 = ctxt->regs->ip; 309 return ES_EXCEPTION; 310 } else if (insn_bytes == -EINVAL) { 311 /* Effective RIP could not be calculated */ 312 ctxt->fi.vector = X86_TRAP_GP; 313 ctxt->fi.error_code = 0; 314 ctxt->fi.cr2 = 0; 315 return ES_EXCEPTION; 316 } 317 318 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 319 return ES_DECODE_FAILED; 320 321 if (ctxt->insn.immediate.got) 322 return ES_OK; 323 else 324 return ES_DECODE_FAILED; 325 } 326 327 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 328 { 329 char buffer[MAX_INSN_SIZE]; 330 int res, ret; 331 332 res = vc_fetch_insn_kernel(ctxt, buffer); 333 if (res) { 334 ctxt->fi.vector = X86_TRAP_PF; 335 ctxt->fi.error_code = X86_PF_INSTR; 336 ctxt->fi.cr2 = ctxt->regs->ip; 337 return ES_EXCEPTION; 338 } 339 340 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 341 if (ret < 0) 342 return ES_DECODE_FAILED; 343 else 344 return ES_OK; 345 } 346 347 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 348 { 349 if (user_mode(ctxt->regs)) 350 return __vc_decode_user_insn(ctxt); 351 else 352 return __vc_decode_kern_insn(ctxt); 353 } 354 355 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 356 char *dst, char *buf, size_t size) 357 { 358 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 359 360 /* 361 * This function uses __put_user() independent of whether kernel or user 362 * memory is accessed. This works fine because __put_user() does no 363 * sanity checks of the pointer being accessed. All that it does is 364 * to report when the access failed. 365 * 366 * Also, this function runs in atomic context, so __put_user() is not 367 * allowed to sleep. The page-fault handler detects that it is running 368 * in atomic context and will not try to take mmap_sem and handle the 369 * fault, so additional pagefault_enable()/disable() calls are not 370 * needed. 371 * 372 * The access can't be done via copy_to_user() here because 373 * vc_write_mem() must not use string instructions to access unsafe 374 * memory. The reason is that MOVS is emulated by the #VC handler by 375 * splitting the move up into a read and a write and taking a nested #VC 376 * exception on whatever of them is the MMIO access. Using string 377 * instructions here would cause infinite nesting. 378 */ 379 switch (size) { 380 case 1: { 381 u8 d1; 382 u8 __user *target = (u8 __user *)dst; 383 384 memcpy(&d1, buf, 1); 385 if (__put_user(d1, target)) 386 goto fault; 387 break; 388 } 389 case 2: { 390 u16 d2; 391 u16 __user *target = (u16 __user *)dst; 392 393 memcpy(&d2, buf, 2); 394 if (__put_user(d2, target)) 395 goto fault; 396 break; 397 } 398 case 4: { 399 u32 d4; 400 u32 __user *target = (u32 __user *)dst; 401 402 memcpy(&d4, buf, 4); 403 if (__put_user(d4, target)) 404 goto fault; 405 break; 406 } 407 case 8: { 408 u64 d8; 409 u64 __user *target = (u64 __user *)dst; 410 411 memcpy(&d8, buf, 8); 412 if (__put_user(d8, target)) 413 goto fault; 414 break; 415 } 416 default: 417 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 418 return ES_UNSUPPORTED; 419 } 420 421 return ES_OK; 422 423 fault: 424 if (user_mode(ctxt->regs)) 425 error_code |= X86_PF_USER; 426 427 ctxt->fi.vector = X86_TRAP_PF; 428 ctxt->fi.error_code = error_code; 429 ctxt->fi.cr2 = (unsigned long)dst; 430 431 return ES_EXCEPTION; 432 } 433 434 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 435 char *src, char *buf, size_t size) 436 { 437 unsigned long error_code = X86_PF_PROT; 438 439 /* 440 * This function uses __get_user() independent of whether kernel or user 441 * memory is accessed. This works fine because __get_user() does no 442 * sanity checks of the pointer being accessed. All that it does is 443 * to report when the access failed. 444 * 445 * Also, this function runs in atomic context, so __get_user() is not 446 * allowed to sleep. The page-fault handler detects that it is running 447 * in atomic context and will not try to take mmap_sem and handle the 448 * fault, so additional pagefault_enable()/disable() calls are not 449 * needed. 450 * 451 * The access can't be done via copy_from_user() here because 452 * vc_read_mem() must not use string instructions to access unsafe 453 * memory. The reason is that MOVS is emulated by the #VC handler by 454 * splitting the move up into a read and a write and taking a nested #VC 455 * exception on whatever of them is the MMIO access. Using string 456 * instructions here would cause infinite nesting. 457 */ 458 switch (size) { 459 case 1: { 460 u8 d1; 461 u8 __user *s = (u8 __user *)src; 462 463 if (__get_user(d1, s)) 464 goto fault; 465 memcpy(buf, &d1, 1); 466 break; 467 } 468 case 2: { 469 u16 d2; 470 u16 __user *s = (u16 __user *)src; 471 472 if (__get_user(d2, s)) 473 goto fault; 474 memcpy(buf, &d2, 2); 475 break; 476 } 477 case 4: { 478 u32 d4; 479 u32 __user *s = (u32 __user *)src; 480 481 if (__get_user(d4, s)) 482 goto fault; 483 memcpy(buf, &d4, 4); 484 break; 485 } 486 case 8: { 487 u64 d8; 488 u64 __user *s = (u64 __user *)src; 489 if (__get_user(d8, s)) 490 goto fault; 491 memcpy(buf, &d8, 8); 492 break; 493 } 494 default: 495 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 496 return ES_UNSUPPORTED; 497 } 498 499 return ES_OK; 500 501 fault: 502 if (user_mode(ctxt->regs)) 503 error_code |= X86_PF_USER; 504 505 ctxt->fi.vector = X86_TRAP_PF; 506 ctxt->fi.error_code = error_code; 507 ctxt->fi.cr2 = (unsigned long)src; 508 509 return ES_EXCEPTION; 510 } 511 512 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 513 unsigned long vaddr, phys_addr_t *paddr) 514 { 515 unsigned long va = (unsigned long)vaddr; 516 unsigned int level; 517 phys_addr_t pa; 518 pgd_t *pgd; 519 pte_t *pte; 520 521 pgd = __va(read_cr3_pa()); 522 pgd = &pgd[pgd_index(va)]; 523 pte = lookup_address_in_pgd(pgd, va, &level); 524 if (!pte) { 525 ctxt->fi.vector = X86_TRAP_PF; 526 ctxt->fi.cr2 = vaddr; 527 ctxt->fi.error_code = 0; 528 529 if (user_mode(ctxt->regs)) 530 ctxt->fi.error_code |= X86_PF_USER; 531 532 return ES_EXCEPTION; 533 } 534 535 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 536 /* Emulated MMIO to/from encrypted memory not supported */ 537 return ES_UNSUPPORTED; 538 539 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 540 pa |= va & ~page_level_mask(level); 541 542 *paddr = pa; 543 544 return ES_OK; 545 } 546 547 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) 548 { 549 BUG_ON(size > 4); 550 551 if (user_mode(ctxt->regs)) { 552 struct thread_struct *t = ¤t->thread; 553 struct io_bitmap *iobm = t->io_bitmap; 554 size_t idx; 555 556 if (!iobm) 557 goto fault; 558 559 for (idx = port; idx < port + size; ++idx) { 560 if (test_bit(idx, iobm->bitmap)) 561 goto fault; 562 } 563 } 564 565 return ES_OK; 566 567 fault: 568 ctxt->fi.vector = X86_TRAP_GP; 569 ctxt->fi.error_code = 0; 570 571 return ES_EXCEPTION; 572 } 573 574 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 575 { 576 long error_code = ctxt->fi.error_code; 577 int trapnr = ctxt->fi.vector; 578 579 ctxt->regs->orig_ax = ctxt->fi.error_code; 580 581 switch (trapnr) { 582 case X86_TRAP_GP: 583 exc_general_protection(ctxt->regs, error_code); 584 break; 585 case X86_TRAP_UD: 586 exc_invalid_op(ctxt->regs); 587 break; 588 case X86_TRAP_PF: 589 write_cr2(ctxt->fi.cr2); 590 exc_page_fault(ctxt->regs, error_code); 591 break; 592 case X86_TRAP_AC: 593 exc_alignment_check(ctxt->regs, error_code); 594 break; 595 default: 596 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 597 BUG(); 598 } 599 } 600 601 /* Include code shared with pre-decompression boot stage */ 602 #include "shared.c" 603 604 static inline struct svsm_ca *svsm_get_caa(void) 605 { 606 /* 607 * Use rIP-relative references when called early in the boot. If 608 * ->use_cas is set, then it is late in the boot and no need 609 * to worry about rIP-relative references. 610 */ 611 if (RIP_REL_REF(sev_cfg).use_cas) 612 return this_cpu_read(svsm_caa); 613 else 614 return RIP_REL_REF(boot_svsm_caa); 615 } 616 617 static u64 svsm_get_caa_pa(void) 618 { 619 /* 620 * Use rIP-relative references when called early in the boot. If 621 * ->use_cas is set, then it is late in the boot and no need 622 * to worry about rIP-relative references. 623 */ 624 if (RIP_REL_REF(sev_cfg).use_cas) 625 return this_cpu_read(svsm_caa_pa); 626 else 627 return RIP_REL_REF(boot_svsm_caa_pa); 628 } 629 630 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 631 { 632 struct sev_es_runtime_data *data; 633 struct ghcb *ghcb; 634 635 WARN_ON(!irqs_disabled()); 636 637 data = this_cpu_read(runtime_data); 638 ghcb = &data->ghcb_page; 639 640 if (state->ghcb) { 641 /* Restore GHCB from Backup */ 642 *ghcb = *state->ghcb; 643 data->backup_ghcb_active = false; 644 state->ghcb = NULL; 645 } else { 646 /* 647 * Invalidate the GHCB so a VMGEXIT instruction issued 648 * from userspace won't appear to be valid. 649 */ 650 vc_ghcb_invalidate(ghcb); 651 data->ghcb_active = false; 652 } 653 } 654 655 static int svsm_perform_call_protocol(struct svsm_call *call) 656 { 657 struct ghcb_state state; 658 unsigned long flags; 659 struct ghcb *ghcb; 660 int ret; 661 662 /* 663 * This can be called very early in the boot, use native functions in 664 * order to avoid paravirt issues. 665 */ 666 flags = native_local_irq_save(); 667 668 /* 669 * Use rip-relative references when called early in the boot. If 670 * ghcbs_initialized is set, then it is late in the boot and no need 671 * to worry about rip-relative references in called functions. 672 */ 673 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 674 ghcb = __sev_get_ghcb(&state); 675 else if (RIP_REL_REF(boot_ghcb)) 676 ghcb = RIP_REL_REF(boot_ghcb); 677 else 678 ghcb = NULL; 679 680 do { 681 ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) 682 : svsm_perform_msr_protocol(call); 683 } while (ret == -EAGAIN); 684 685 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 686 __sev_put_ghcb(&state); 687 688 native_local_irq_restore(flags); 689 690 return ret; 691 } 692 693 void noinstr __sev_es_nmi_complete(void) 694 { 695 struct ghcb_state state; 696 struct ghcb *ghcb; 697 698 ghcb = __sev_get_ghcb(&state); 699 700 vc_ghcb_invalidate(ghcb); 701 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 702 ghcb_set_sw_exit_info_1(ghcb, 0); 703 ghcb_set_sw_exit_info_2(ghcb, 0); 704 705 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 706 VMGEXIT(); 707 708 __sev_put_ghcb(&state); 709 } 710 711 static u64 __init get_snp_jump_table_addr(void) 712 { 713 struct snp_secrets_page *secrets; 714 void __iomem *mem; 715 u64 addr; 716 717 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 718 if (!mem) { 719 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 720 return 0; 721 } 722 723 secrets = (__force struct snp_secrets_page *)mem; 724 725 addr = secrets->os_area.ap_jump_table_pa; 726 iounmap(mem); 727 728 return addr; 729 } 730 731 static u64 __init get_jump_table_addr(void) 732 { 733 struct ghcb_state state; 734 unsigned long flags; 735 struct ghcb *ghcb; 736 u64 ret = 0; 737 738 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 739 return get_snp_jump_table_addr(); 740 741 local_irq_save(flags); 742 743 ghcb = __sev_get_ghcb(&state); 744 745 vc_ghcb_invalidate(ghcb); 746 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 747 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 748 ghcb_set_sw_exit_info_2(ghcb, 0); 749 750 sev_es_wr_ghcb_msr(__pa(ghcb)); 751 VMGEXIT(); 752 753 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 754 ghcb_sw_exit_info_2_is_valid(ghcb)) 755 ret = ghcb->save.sw_exit_info_2; 756 757 __sev_put_ghcb(&state); 758 759 local_irq_restore(flags); 760 761 return ret; 762 } 763 764 static void __head 765 early_set_pages_state(unsigned long vaddr, unsigned long paddr, 766 unsigned long npages, enum psc_op op) 767 { 768 unsigned long paddr_end; 769 u64 val; 770 771 vaddr = vaddr & PAGE_MASK; 772 773 paddr = paddr & PAGE_MASK; 774 paddr_end = paddr + (npages << PAGE_SHIFT); 775 776 while (paddr < paddr_end) { 777 /* Page validation must be rescinded before changing to shared */ 778 if (op == SNP_PAGE_STATE_SHARED) 779 pvalidate_4k_page(vaddr, paddr, false); 780 781 /* 782 * Use the MSR protocol because this function can be called before 783 * the GHCB is established. 784 */ 785 sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); 786 VMGEXIT(); 787 788 val = sev_es_rd_ghcb_msr(); 789 790 if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP, 791 "Wrong PSC response code: 0x%x\n", 792 (unsigned int)GHCB_RESP_CODE(val))) 793 goto e_term; 794 795 if (WARN(GHCB_MSR_PSC_RESP_VAL(val), 796 "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n", 797 op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared", 798 paddr, GHCB_MSR_PSC_RESP_VAL(val))) 799 goto e_term; 800 801 /* Page validation must be performed after changing to private */ 802 if (op == SNP_PAGE_STATE_PRIVATE) 803 pvalidate_4k_page(vaddr, paddr, true); 804 805 vaddr += PAGE_SIZE; 806 paddr += PAGE_SIZE; 807 } 808 809 return; 810 811 e_term: 812 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 813 } 814 815 void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, 816 unsigned long npages) 817 { 818 /* 819 * This can be invoked in early boot while running identity mapped, so 820 * use an open coded check for SNP instead of using cc_platform_has(). 821 * This eliminates worries about jump tables or checking boot_cpu_data 822 * in the cc_platform_has() function. 823 */ 824 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 825 return; 826 827 /* 828 * Ask the hypervisor to mark the memory pages as private in the RMP 829 * table. 830 */ 831 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); 832 } 833 834 void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, 835 unsigned long npages) 836 { 837 /* 838 * This can be invoked in early boot while running identity mapped, so 839 * use an open coded check for SNP instead of using cc_platform_has(). 840 * This eliminates worries about jump tables or checking boot_cpu_data 841 * in the cc_platform_has() function. 842 */ 843 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 844 return; 845 846 /* Ask hypervisor to mark the memory pages shared in the RMP table. */ 847 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); 848 } 849 850 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 851 unsigned long vaddr_end, int op) 852 { 853 struct ghcb_state state; 854 bool use_large_entry; 855 struct psc_hdr *hdr; 856 struct psc_entry *e; 857 unsigned long flags; 858 unsigned long pfn; 859 struct ghcb *ghcb; 860 int i; 861 862 hdr = &data->hdr; 863 e = data->entries; 864 865 memset(data, 0, sizeof(*data)); 866 i = 0; 867 868 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 869 hdr->end_entry = i; 870 871 if (is_vmalloc_addr((void *)vaddr)) { 872 pfn = vmalloc_to_pfn((void *)vaddr); 873 use_large_entry = false; 874 } else { 875 pfn = __pa(vaddr) >> PAGE_SHIFT; 876 use_large_entry = true; 877 } 878 879 e->gfn = pfn; 880 e->operation = op; 881 882 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 883 (vaddr_end - vaddr) >= PMD_SIZE) { 884 e->pagesize = RMP_PG_SIZE_2M; 885 vaddr += PMD_SIZE; 886 } else { 887 e->pagesize = RMP_PG_SIZE_4K; 888 vaddr += PAGE_SIZE; 889 } 890 891 e++; 892 i++; 893 } 894 895 /* Page validation must be rescinded before changing to shared */ 896 if (op == SNP_PAGE_STATE_SHARED) 897 pvalidate_pages(data); 898 899 local_irq_save(flags); 900 901 if (sev_cfg.ghcbs_initialized) 902 ghcb = __sev_get_ghcb(&state); 903 else 904 ghcb = boot_ghcb; 905 906 /* Invoke the hypervisor to perform the page state changes */ 907 if (!ghcb || vmgexit_psc(ghcb, data)) 908 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 909 910 if (sev_cfg.ghcbs_initialized) 911 __sev_put_ghcb(&state); 912 913 local_irq_restore(flags); 914 915 /* Page validation must be performed after changing to private */ 916 if (op == SNP_PAGE_STATE_PRIVATE) 917 pvalidate_pages(data); 918 919 return vaddr; 920 } 921 922 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 923 { 924 struct snp_psc_desc desc; 925 unsigned long vaddr_end; 926 927 /* Use the MSR protocol when a GHCB is not available. */ 928 if (!boot_ghcb) 929 return early_set_pages_state(vaddr, __pa(vaddr), npages, op); 930 931 vaddr = vaddr & PAGE_MASK; 932 vaddr_end = vaddr + (npages << PAGE_SHIFT); 933 934 while (vaddr < vaddr_end) 935 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 936 } 937 938 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 939 { 940 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 941 return; 942 943 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 944 } 945 946 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 947 { 948 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 949 return; 950 951 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 952 } 953 954 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 955 { 956 unsigned long vaddr, npages; 957 958 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 959 return; 960 961 vaddr = (unsigned long)__va(start); 962 npages = (end - start) >> PAGE_SHIFT; 963 964 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 965 } 966 967 static void set_pte_enc(pte_t *kpte, int level, void *va) 968 { 969 struct pte_enc_desc d = { 970 .kpte = kpte, 971 .pte_level = level, 972 .va = va, 973 .encrypt = true 974 }; 975 976 prepare_pte_enc(&d); 977 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 978 } 979 980 static void unshare_all_memory(void) 981 { 982 unsigned long addr, end, size, ghcb; 983 struct sev_es_runtime_data *data; 984 unsigned int npages, level; 985 bool skipped_addr; 986 pte_t *pte; 987 int cpu; 988 989 /* Unshare the direct mapping. */ 990 addr = PAGE_OFFSET; 991 end = PAGE_OFFSET + get_max_mapped(); 992 993 while (addr < end) { 994 pte = lookup_address(addr, &level); 995 size = page_level_size(level); 996 npages = size / PAGE_SIZE; 997 skipped_addr = false; 998 999 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 1000 addr += size; 1001 continue; 1002 } 1003 1004 /* 1005 * Ensure that all the per-CPU GHCBs are made private at the 1006 * end of the unsharing loop so that the switch to the slower 1007 * MSR protocol happens last. 1008 */ 1009 for_each_possible_cpu(cpu) { 1010 data = per_cpu(runtime_data, cpu); 1011 ghcb = (unsigned long)&data->ghcb_page; 1012 1013 if (addr <= ghcb && ghcb <= addr + size) { 1014 skipped_addr = true; 1015 break; 1016 } 1017 } 1018 1019 if (!skipped_addr) { 1020 set_pte_enc(pte, level, (void *)addr); 1021 snp_set_memory_private(addr, npages); 1022 } 1023 addr += size; 1024 } 1025 1026 /* Unshare all bss decrypted memory. */ 1027 addr = (unsigned long)__start_bss_decrypted; 1028 end = (unsigned long)__start_bss_decrypted_unused; 1029 npages = (end - addr) >> PAGE_SHIFT; 1030 1031 for (; addr < end; addr += PAGE_SIZE) { 1032 pte = lookup_address(addr, &level); 1033 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 1034 continue; 1035 1036 set_pte_enc(pte, level, (void *)addr); 1037 } 1038 addr = (unsigned long)__start_bss_decrypted; 1039 snp_set_memory_private(addr, npages); 1040 1041 __flush_tlb_all(); 1042 } 1043 1044 /* Stop new private<->shared conversions */ 1045 void snp_kexec_begin(void) 1046 { 1047 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1048 return; 1049 1050 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1051 return; 1052 1053 /* 1054 * Crash kernel ends up here with interrupts disabled: can't wait for 1055 * conversions to finish. 1056 * 1057 * If race happened, just report and proceed. 1058 */ 1059 if (!set_memory_enc_stop_conversion()) 1060 pr_warn("Failed to stop shared<->private conversions\n"); 1061 } 1062 1063 void snp_kexec_finish(void) 1064 { 1065 struct sev_es_runtime_data *data; 1066 unsigned int level, cpu; 1067 unsigned long size; 1068 struct ghcb *ghcb; 1069 pte_t *pte; 1070 1071 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1072 return; 1073 1074 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1075 return; 1076 1077 unshare_all_memory(); 1078 1079 /* 1080 * Switch to using the MSR protocol to change per-CPU GHCBs to 1081 * private. All the per-CPU GHCBs have been switched back to private, 1082 * so can't do any more GHCB calls to the hypervisor beyond this point 1083 * until the kexec'ed kernel starts running. 1084 */ 1085 boot_ghcb = NULL; 1086 sev_cfg.ghcbs_initialized = false; 1087 1088 for_each_possible_cpu(cpu) { 1089 data = per_cpu(runtime_data, cpu); 1090 ghcb = &data->ghcb_page; 1091 pte = lookup_address((unsigned long)ghcb, &level); 1092 size = page_level_size(level); 1093 set_pte_enc(pte, level, (void *)ghcb); 1094 snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); 1095 } 1096 } 1097 1098 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1099 { 1100 int ret; 1101 1102 if (snp_vmpl) { 1103 struct svsm_call call = {}; 1104 unsigned long flags; 1105 1106 local_irq_save(flags); 1107 1108 call.caa = this_cpu_read(svsm_caa); 1109 call.rcx = __pa(va); 1110 1111 if (make_vmsa) { 1112 /* Protocol 0, Call ID 2 */ 1113 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1114 call.rdx = __pa(caa); 1115 call.r8 = apic_id; 1116 } else { 1117 /* Protocol 0, Call ID 3 */ 1118 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1119 } 1120 1121 ret = svsm_perform_call_protocol(&call); 1122 1123 local_irq_restore(flags); 1124 } else { 1125 /* 1126 * If the kernel runs at VMPL0, it can change the VMSA 1127 * bit for a page using the RMPADJUST instruction. 1128 * However, for the instruction to succeed it must 1129 * target the permissions of a lesser privileged (higher 1130 * numbered) VMPL level, so use VMPL1. 1131 */ 1132 u64 attrs = 1; 1133 1134 if (make_vmsa) 1135 attrs |= RMPADJUST_VMSA_PAGE_BIT; 1136 1137 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1138 } 1139 1140 return ret; 1141 } 1142 1143 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 1144 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 1145 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 1146 1147 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 1148 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 1149 1150 static void *snp_alloc_vmsa_page(int cpu) 1151 { 1152 struct page *p; 1153 1154 /* 1155 * Allocate VMSA page to work around the SNP erratum where the CPU will 1156 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 1157 * collides with the RMP entry of VMSA page. The recommended workaround 1158 * is to not use a large page. 1159 * 1160 * Allocate an 8k page which is also 8k-aligned. 1161 */ 1162 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 1163 if (!p) 1164 return NULL; 1165 1166 split_page(p, 1); 1167 1168 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 1169 __free_page(p); 1170 1171 return page_address(p + 1); 1172 } 1173 1174 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1175 { 1176 int err; 1177 1178 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1179 if (err) 1180 pr_err("clear VMSA page failed (%u), leaking page\n", err); 1181 else 1182 free_page((unsigned long)vmsa); 1183 } 1184 1185 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) 1186 { 1187 struct sev_es_save_area *cur_vmsa, *vmsa; 1188 struct ghcb_state state; 1189 struct svsm_ca *caa; 1190 unsigned long flags; 1191 struct ghcb *ghcb; 1192 u8 sipi_vector; 1193 int cpu, ret; 1194 u64 cr4; 1195 1196 /* 1197 * The hypervisor SNP feature support check has happened earlier, just check 1198 * the AP_CREATION one here. 1199 */ 1200 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 1201 return -EOPNOTSUPP; 1202 1203 /* 1204 * Verify the desired start IP against the known trampoline start IP 1205 * to catch any future new trampolines that may be introduced that 1206 * would require a new protected guest entry point. 1207 */ 1208 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 1209 "Unsupported SNP start_ip: %lx\n", start_ip)) 1210 return -EINVAL; 1211 1212 /* Override start_ip with known protected guest start IP */ 1213 start_ip = real_mode_header->sev_es_trampoline_start; 1214 1215 /* Find the logical CPU for the APIC ID */ 1216 for_each_present_cpu(cpu) { 1217 if (arch_match_cpu_phys_id(cpu, apic_id)) 1218 break; 1219 } 1220 if (cpu >= nr_cpu_ids) 1221 return -EINVAL; 1222 1223 cur_vmsa = per_cpu(sev_vmsa, cpu); 1224 1225 /* 1226 * A new VMSA is created each time because there is no guarantee that 1227 * the current VMSA is the kernels or that the vCPU is not running. If 1228 * an attempt was done to use the current VMSA with a running vCPU, a 1229 * #VMEXIT of that vCPU would wipe out all of the settings being done 1230 * here. 1231 */ 1232 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 1233 if (!vmsa) 1234 return -ENOMEM; 1235 1236 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 1237 caa = per_cpu(svsm_caa, cpu); 1238 1239 /* CR4 should maintain the MCE value */ 1240 cr4 = native_read_cr4() & X86_CR4_MCE; 1241 1242 /* Set the CS value based on the start_ip converted to a SIPI vector */ 1243 sipi_vector = (start_ip >> 12); 1244 vmsa->cs.base = sipi_vector << 12; 1245 vmsa->cs.limit = AP_INIT_CS_LIMIT; 1246 vmsa->cs.attrib = INIT_CS_ATTRIBS; 1247 vmsa->cs.selector = sipi_vector << 8; 1248 1249 /* Set the RIP value based on start_ip */ 1250 vmsa->rip = start_ip & 0xfff; 1251 1252 /* Set AP INIT defaults as documented in the APM */ 1253 vmsa->ds.limit = AP_INIT_DS_LIMIT; 1254 vmsa->ds.attrib = INIT_DS_ATTRIBS; 1255 vmsa->es = vmsa->ds; 1256 vmsa->fs = vmsa->ds; 1257 vmsa->gs = vmsa->ds; 1258 vmsa->ss = vmsa->ds; 1259 1260 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 1261 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 1262 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 1263 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 1264 vmsa->tr.limit = AP_INIT_TR_LIMIT; 1265 vmsa->tr.attrib = INIT_TR_ATTRIBS; 1266 1267 vmsa->cr4 = cr4; 1268 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 1269 vmsa->dr7 = DR7_RESET_VALUE; 1270 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 1271 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 1272 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 1273 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 1274 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 1275 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 1276 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 1277 1278 /* SVME must be set. */ 1279 vmsa->efer = EFER_SVME; 1280 1281 /* 1282 * Set the SNP-specific fields for this VMSA: 1283 * VMPL level 1284 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 1285 */ 1286 vmsa->vmpl = snp_vmpl; 1287 vmsa->sev_features = sev_status >> 2; 1288 1289 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 1290 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 1291 vmsa->tsc_scale = snp_tsc_scale; 1292 vmsa->tsc_offset = snp_tsc_offset; 1293 } 1294 1295 /* Switch the page over to a VMSA page now that it is initialized */ 1296 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 1297 if (ret) { 1298 pr_err("set VMSA page failed (%u)\n", ret); 1299 free_page((unsigned long)vmsa); 1300 1301 return -EINVAL; 1302 } 1303 1304 /* Issue VMGEXIT AP Creation NAE event */ 1305 local_irq_save(flags); 1306 1307 ghcb = __sev_get_ghcb(&state); 1308 1309 vc_ghcb_invalidate(ghcb); 1310 ghcb_set_rax(ghcb, vmsa->sev_features); 1311 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 1312 ghcb_set_sw_exit_info_1(ghcb, 1313 ((u64)apic_id << 32) | 1314 ((u64)snp_vmpl << 16) | 1315 SVM_VMGEXIT_AP_CREATE); 1316 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 1317 1318 sev_es_wr_ghcb_msr(__pa(ghcb)); 1319 VMGEXIT(); 1320 1321 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 1322 lower_32_bits(ghcb->save.sw_exit_info_1)) { 1323 pr_err("SNP AP Creation error\n"); 1324 ret = -EINVAL; 1325 } 1326 1327 __sev_put_ghcb(&state); 1328 1329 local_irq_restore(flags); 1330 1331 /* Perform cleanup if there was an error */ 1332 if (ret) { 1333 snp_cleanup_vmsa(vmsa, apic_id); 1334 vmsa = NULL; 1335 } 1336 1337 /* Free up any previous VMSA page */ 1338 if (cur_vmsa) 1339 snp_cleanup_vmsa(cur_vmsa, apic_id); 1340 1341 /* Record the current VMSA page */ 1342 per_cpu(sev_vmsa, cpu) = vmsa; 1343 1344 return ret; 1345 } 1346 1347 void __init snp_set_wakeup_secondary_cpu(void) 1348 { 1349 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1350 return; 1351 1352 /* 1353 * Always set this override if SNP is enabled. This makes it the 1354 * required method to start APs under SNP. If the hypervisor does 1355 * not support AP creation, then no APs will be started. 1356 */ 1357 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 1358 } 1359 1360 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 1361 { 1362 u16 startup_cs, startup_ip; 1363 phys_addr_t jump_table_pa; 1364 u64 jump_table_addr; 1365 u16 __iomem *jump_table; 1366 1367 jump_table_addr = get_jump_table_addr(); 1368 1369 /* On UP guests there is no jump table so this is not a failure */ 1370 if (!jump_table_addr) 1371 return 0; 1372 1373 /* Check if AP Jump Table is page-aligned */ 1374 if (jump_table_addr & ~PAGE_MASK) 1375 return -EINVAL; 1376 1377 jump_table_pa = jump_table_addr & PAGE_MASK; 1378 1379 startup_cs = (u16)(rmh->trampoline_start >> 4); 1380 startup_ip = (u16)(rmh->sev_es_trampoline_start - 1381 rmh->trampoline_start); 1382 1383 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 1384 if (!jump_table) 1385 return -EIO; 1386 1387 writew(startup_ip, &jump_table[0]); 1388 writew(startup_cs, &jump_table[1]); 1389 1390 iounmap(jump_table); 1391 1392 return 0; 1393 } 1394 1395 /* 1396 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 1397 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 1398 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 1399 */ 1400 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 1401 { 1402 struct sev_es_runtime_data *data; 1403 unsigned long address, pflags; 1404 int cpu; 1405 u64 pfn; 1406 1407 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1408 return 0; 1409 1410 pflags = _PAGE_NX | _PAGE_RW; 1411 1412 for_each_possible_cpu(cpu) { 1413 data = per_cpu(runtime_data, cpu); 1414 1415 address = __pa(&data->ghcb_page); 1416 pfn = address >> PAGE_SHIFT; 1417 1418 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 1419 return 1; 1420 } 1421 1422 return 0; 1423 } 1424 1425 /* Writes to the SVSM CAA MSR are ignored */ 1426 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) 1427 { 1428 if (write) 1429 return ES_OK; 1430 1431 regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); 1432 regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); 1433 1434 return ES_OK; 1435 } 1436 1437 /* 1438 * TSC related accesses should not exit to the hypervisor when a guest is 1439 * executing with Secure TSC enabled, so special handling is required for 1440 * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. 1441 */ 1442 static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) 1443 { 1444 u64 tsc; 1445 1446 /* 1447 * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. 1448 * Terminate the SNP guest when the interception is enabled. 1449 */ 1450 if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) 1451 return ES_VMM_ERROR; 1452 1453 /* 1454 * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC 1455 * to return undefined values, so ignore all writes. 1456 * 1457 * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use 1458 * the value returned by rdtsc_ordered(). 1459 */ 1460 if (write) { 1461 WARN_ONCE(1, "TSC MSR writes are verboten!\n"); 1462 return ES_OK; 1463 } 1464 1465 tsc = rdtsc_ordered(); 1466 regs->ax = lower_32_bits(tsc); 1467 regs->dx = upper_32_bits(tsc); 1468 1469 return ES_OK; 1470 } 1471 1472 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1473 { 1474 struct pt_regs *regs = ctxt->regs; 1475 enum es_result ret; 1476 bool write; 1477 1478 /* Is it a WRMSR? */ 1479 write = ctxt->insn.opcode.bytes[1] == 0x30; 1480 1481 switch (regs->cx) { 1482 case MSR_SVSM_CAA: 1483 return __vc_handle_msr_caa(regs, write); 1484 case MSR_IA32_TSC: 1485 case MSR_AMD64_GUEST_TSC_FREQ: 1486 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 1487 return __vc_handle_secure_tsc_msrs(regs, write); 1488 else 1489 break; 1490 default: 1491 break; 1492 } 1493 1494 ghcb_set_rcx(ghcb, regs->cx); 1495 if (write) { 1496 ghcb_set_rax(ghcb, regs->ax); 1497 ghcb_set_rdx(ghcb, regs->dx); 1498 } 1499 1500 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); 1501 1502 if ((ret == ES_OK) && !write) { 1503 regs->ax = ghcb->save.rax; 1504 regs->dx = ghcb->save.rdx; 1505 } 1506 1507 return ret; 1508 } 1509 1510 static void snp_register_per_cpu_ghcb(void) 1511 { 1512 struct sev_es_runtime_data *data; 1513 struct ghcb *ghcb; 1514 1515 data = this_cpu_read(runtime_data); 1516 ghcb = &data->ghcb_page; 1517 1518 snp_register_ghcb_early(__pa(ghcb)); 1519 } 1520 1521 void setup_ghcb(void) 1522 { 1523 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1524 return; 1525 1526 /* 1527 * Check whether the runtime #VC exception handler is active. It uses 1528 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1529 * 1530 * If SNP is active, register the per-CPU GHCB page so that the runtime 1531 * exception handler can use it. 1532 */ 1533 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1534 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1535 snp_register_per_cpu_ghcb(); 1536 1537 sev_cfg.ghcbs_initialized = true; 1538 1539 return; 1540 } 1541 1542 /* 1543 * Make sure the hypervisor talks a supported protocol. 1544 * This gets called only in the BSP boot phase. 1545 */ 1546 if (!sev_es_negotiate_protocol()) 1547 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1548 1549 /* 1550 * Clear the boot_ghcb. The first exception comes in before the bss 1551 * section is cleared. 1552 */ 1553 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1554 1555 /* Alright - Make the boot-ghcb public */ 1556 boot_ghcb = &boot_ghcb_page; 1557 1558 /* SNP guest requires that GHCB GPA must be registered. */ 1559 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1560 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1561 } 1562 1563 #ifdef CONFIG_HOTPLUG_CPU 1564 static void sev_es_ap_hlt_loop(void) 1565 { 1566 struct ghcb_state state; 1567 struct ghcb *ghcb; 1568 1569 ghcb = __sev_get_ghcb(&state); 1570 1571 while (true) { 1572 vc_ghcb_invalidate(ghcb); 1573 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1574 ghcb_set_sw_exit_info_1(ghcb, 0); 1575 ghcb_set_sw_exit_info_2(ghcb, 0); 1576 1577 sev_es_wr_ghcb_msr(__pa(ghcb)); 1578 VMGEXIT(); 1579 1580 /* Wakeup signal? */ 1581 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1582 ghcb->save.sw_exit_info_2) 1583 break; 1584 } 1585 1586 __sev_put_ghcb(&state); 1587 } 1588 1589 /* 1590 * Play_dead handler when running under SEV-ES. This is needed because 1591 * the hypervisor can't deliver an SIPI request to restart the AP. 1592 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1593 * hypervisor wakes it up again. 1594 */ 1595 static void sev_es_play_dead(void) 1596 { 1597 play_dead_common(); 1598 1599 /* IRQs now disabled */ 1600 1601 sev_es_ap_hlt_loop(); 1602 1603 /* 1604 * If we get here, the VCPU was woken up again. Jump to CPU 1605 * startup code to get it back online. 1606 */ 1607 soft_restart_cpu(); 1608 } 1609 #else /* CONFIG_HOTPLUG_CPU */ 1610 #define sev_es_play_dead native_play_dead 1611 #endif /* CONFIG_HOTPLUG_CPU */ 1612 1613 #ifdef CONFIG_SMP 1614 static void __init sev_es_setup_play_dead(void) 1615 { 1616 smp_ops.play_dead = sev_es_play_dead; 1617 } 1618 #else 1619 static inline void sev_es_setup_play_dead(void) { } 1620 #endif 1621 1622 static void __init alloc_runtime_data(int cpu) 1623 { 1624 struct sev_es_runtime_data *data; 1625 1626 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1627 if (!data) 1628 panic("Can't allocate SEV-ES runtime data"); 1629 1630 per_cpu(runtime_data, cpu) = data; 1631 1632 if (snp_vmpl) { 1633 struct svsm_ca *caa; 1634 1635 /* Allocate the SVSM CA page if an SVSM is present */ 1636 caa = memblock_alloc(sizeof(*caa), PAGE_SIZE); 1637 if (!caa) 1638 panic("Can't allocate SVSM CA page\n"); 1639 1640 per_cpu(svsm_caa, cpu) = caa; 1641 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1642 } 1643 } 1644 1645 static void __init init_ghcb(int cpu) 1646 { 1647 struct sev_es_runtime_data *data; 1648 int err; 1649 1650 data = per_cpu(runtime_data, cpu); 1651 1652 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1653 sizeof(data->ghcb_page)); 1654 if (err) 1655 panic("Can't map GHCBs unencrypted"); 1656 1657 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1658 1659 data->ghcb_active = false; 1660 data->backup_ghcb_active = false; 1661 } 1662 1663 void __init sev_es_init_vc_handling(void) 1664 { 1665 int cpu; 1666 1667 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1668 1669 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1670 return; 1671 1672 if (!sev_es_check_cpu_features()) 1673 panic("SEV-ES CPU Features missing"); 1674 1675 /* 1676 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1677 * features. 1678 */ 1679 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1680 sev_hv_features = get_hv_features(); 1681 1682 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1683 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1684 } 1685 1686 /* Initialize per-cpu GHCB pages */ 1687 for_each_possible_cpu(cpu) { 1688 alloc_runtime_data(cpu); 1689 init_ghcb(cpu); 1690 } 1691 1692 /* If running under an SVSM, switch to the per-cpu CA */ 1693 if (snp_vmpl) { 1694 struct svsm_call call = {}; 1695 unsigned long flags; 1696 int ret; 1697 1698 local_irq_save(flags); 1699 1700 /* 1701 * SVSM_CORE_REMAP_CA call: 1702 * RAX = 0 (Protocol=0, CallID=0) 1703 * RCX = New CA GPA 1704 */ 1705 call.caa = svsm_get_caa(); 1706 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 1707 call.rcx = this_cpu_read(svsm_caa_pa); 1708 ret = svsm_perform_call_protocol(&call); 1709 if (ret) 1710 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", 1711 ret, call.rax_out); 1712 1713 sev_cfg.use_cas = true; 1714 1715 local_irq_restore(flags); 1716 } 1717 1718 sev_es_setup_play_dead(); 1719 1720 /* Secondary CPUs use the runtime #VC handler */ 1721 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1722 } 1723 1724 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 1725 { 1726 int trapnr = ctxt->fi.vector; 1727 1728 if (trapnr == X86_TRAP_PF) 1729 native_write_cr2(ctxt->fi.cr2); 1730 1731 ctxt->regs->orig_ax = ctxt->fi.error_code; 1732 do_early_exception(ctxt->regs, trapnr); 1733 } 1734 1735 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 1736 { 1737 long *reg_array; 1738 int offset; 1739 1740 reg_array = (long *)ctxt->regs; 1741 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 1742 1743 if (offset < 0) 1744 return NULL; 1745 1746 offset /= sizeof(long); 1747 1748 return reg_array + offset; 1749 } 1750 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 1751 unsigned int bytes, bool read) 1752 { 1753 u64 exit_code, exit_info_1, exit_info_2; 1754 unsigned long ghcb_pa = __pa(ghcb); 1755 enum es_result res; 1756 phys_addr_t paddr; 1757 void __user *ref; 1758 1759 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 1760 if (ref == (void __user *)-1L) 1761 return ES_UNSUPPORTED; 1762 1763 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 1764 1765 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 1766 if (res != ES_OK) { 1767 if (res == ES_EXCEPTION && !read) 1768 ctxt->fi.error_code |= X86_PF_WRITE; 1769 1770 return res; 1771 } 1772 1773 exit_info_1 = paddr; 1774 /* Can never be greater than 8 */ 1775 exit_info_2 = bytes; 1776 1777 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 1778 1779 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 1780 } 1781 1782 /* 1783 * The MOVS instruction has two memory operands, which raises the 1784 * problem that it is not known whether the access to the source or the 1785 * destination caused the #VC exception (and hence whether an MMIO read 1786 * or write operation needs to be emulated). 1787 * 1788 * Instead of playing games with walking page-tables and trying to guess 1789 * whether the source or destination is an MMIO range, split the move 1790 * into two operations, a read and a write with only one memory operand. 1791 * This will cause a nested #VC exception on the MMIO address which can 1792 * then be handled. 1793 * 1794 * This implementation has the benefit that it also supports MOVS where 1795 * source _and_ destination are MMIO regions. 1796 * 1797 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 1798 * rare operation. If it turns out to be a performance problem the split 1799 * operations can be moved to memcpy_fromio() and memcpy_toio(). 1800 */ 1801 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 1802 unsigned int bytes) 1803 { 1804 unsigned long ds_base, es_base; 1805 unsigned char *src, *dst; 1806 unsigned char buffer[8]; 1807 enum es_result ret; 1808 bool rep; 1809 int off; 1810 1811 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 1812 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 1813 1814 if (ds_base == -1L || es_base == -1L) { 1815 ctxt->fi.vector = X86_TRAP_GP; 1816 ctxt->fi.error_code = 0; 1817 return ES_EXCEPTION; 1818 } 1819 1820 src = ds_base + (unsigned char *)ctxt->regs->si; 1821 dst = es_base + (unsigned char *)ctxt->regs->di; 1822 1823 ret = vc_read_mem(ctxt, src, buffer, bytes); 1824 if (ret != ES_OK) 1825 return ret; 1826 1827 ret = vc_write_mem(ctxt, dst, buffer, bytes); 1828 if (ret != ES_OK) 1829 return ret; 1830 1831 if (ctxt->regs->flags & X86_EFLAGS_DF) 1832 off = -bytes; 1833 else 1834 off = bytes; 1835 1836 ctxt->regs->si += off; 1837 ctxt->regs->di += off; 1838 1839 rep = insn_has_rep_prefix(&ctxt->insn); 1840 if (rep) 1841 ctxt->regs->cx -= 1; 1842 1843 if (!rep || ctxt->regs->cx == 0) 1844 return ES_OK; 1845 else 1846 return ES_RETRY; 1847 } 1848 1849 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1850 { 1851 struct insn *insn = &ctxt->insn; 1852 enum insn_mmio_type mmio; 1853 unsigned int bytes = 0; 1854 enum es_result ret; 1855 u8 sign_byte; 1856 long *reg_data; 1857 1858 mmio = insn_decode_mmio(insn, &bytes); 1859 if (mmio == INSN_MMIO_DECODE_FAILED) 1860 return ES_DECODE_FAILED; 1861 1862 if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { 1863 reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); 1864 if (!reg_data) 1865 return ES_DECODE_FAILED; 1866 } 1867 1868 if (user_mode(ctxt->regs)) 1869 return ES_UNSUPPORTED; 1870 1871 switch (mmio) { 1872 case INSN_MMIO_WRITE: 1873 memcpy(ghcb->shared_buffer, reg_data, bytes); 1874 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1875 break; 1876 case INSN_MMIO_WRITE_IMM: 1877 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1878 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1879 break; 1880 case INSN_MMIO_READ: 1881 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1882 if (ret) 1883 break; 1884 1885 /* Zero-extend for 32-bit operation */ 1886 if (bytes == 4) 1887 *reg_data = 0; 1888 1889 memcpy(reg_data, ghcb->shared_buffer, bytes); 1890 break; 1891 case INSN_MMIO_READ_ZERO_EXTEND: 1892 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1893 if (ret) 1894 break; 1895 1896 /* Zero extend based on operand size */ 1897 memset(reg_data, 0, insn->opnd_bytes); 1898 memcpy(reg_data, ghcb->shared_buffer, bytes); 1899 break; 1900 case INSN_MMIO_READ_SIGN_EXTEND: 1901 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1902 if (ret) 1903 break; 1904 1905 if (bytes == 1) { 1906 u8 *val = (u8 *)ghcb->shared_buffer; 1907 1908 sign_byte = (*val & 0x80) ? 0xff : 0x00; 1909 } else { 1910 u16 *val = (u16 *)ghcb->shared_buffer; 1911 1912 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 1913 } 1914 1915 /* Sign extend based on operand size */ 1916 memset(reg_data, sign_byte, insn->opnd_bytes); 1917 memcpy(reg_data, ghcb->shared_buffer, bytes); 1918 break; 1919 case INSN_MMIO_MOVS: 1920 ret = vc_handle_mmio_movs(ctxt, bytes); 1921 break; 1922 default: 1923 ret = ES_UNSUPPORTED; 1924 break; 1925 } 1926 1927 return ret; 1928 } 1929 1930 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1931 struct es_em_ctxt *ctxt) 1932 { 1933 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1934 long val, *reg = vc_insn_get_rm(ctxt); 1935 enum es_result ret; 1936 1937 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1938 return ES_VMM_ERROR; 1939 1940 if (!reg) 1941 return ES_DECODE_FAILED; 1942 1943 val = *reg; 1944 1945 /* Upper 32 bits must be written as zeroes */ 1946 if (val >> 32) { 1947 ctxt->fi.vector = X86_TRAP_GP; 1948 ctxt->fi.error_code = 0; 1949 return ES_EXCEPTION; 1950 } 1951 1952 /* Clear out other reserved bits and set bit 10 */ 1953 val = (val & 0xffff23ffL) | BIT(10); 1954 1955 /* Early non-zero writes to DR7 are not supported */ 1956 if (!data && (val & ~DR7_RESET_VALUE)) 1957 return ES_UNSUPPORTED; 1958 1959 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1960 ghcb_set_rax(ghcb, val); 1961 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1962 if (ret != ES_OK) 1963 return ret; 1964 1965 if (data) 1966 data->dr7 = val; 1967 1968 return ES_OK; 1969 } 1970 1971 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1972 struct es_em_ctxt *ctxt) 1973 { 1974 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1975 long *reg = vc_insn_get_rm(ctxt); 1976 1977 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1978 return ES_VMM_ERROR; 1979 1980 if (!reg) 1981 return ES_DECODE_FAILED; 1982 1983 if (data) 1984 *reg = data->dr7; 1985 else 1986 *reg = DR7_RESET_VALUE; 1987 1988 return ES_OK; 1989 } 1990 1991 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1992 struct es_em_ctxt *ctxt) 1993 { 1994 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 1995 } 1996 1997 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1998 { 1999 enum es_result ret; 2000 2001 ghcb_set_rcx(ghcb, ctxt->regs->cx); 2002 2003 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 2004 if (ret != ES_OK) 2005 return ret; 2006 2007 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 2008 return ES_VMM_ERROR; 2009 2010 ctxt->regs->ax = ghcb->save.rax; 2011 ctxt->regs->dx = ghcb->save.rdx; 2012 2013 return ES_OK; 2014 } 2015 2016 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 2017 struct es_em_ctxt *ctxt) 2018 { 2019 /* 2020 * Treat it as a NOP and do not leak a physical address to the 2021 * hypervisor. 2022 */ 2023 return ES_OK; 2024 } 2025 2026 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 2027 struct es_em_ctxt *ctxt) 2028 { 2029 /* Treat the same as MONITOR/MONITORX */ 2030 return ES_OK; 2031 } 2032 2033 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 2034 struct es_em_ctxt *ctxt) 2035 { 2036 enum es_result ret; 2037 2038 ghcb_set_rax(ghcb, ctxt->regs->ax); 2039 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 2040 2041 if (x86_platform.hyper.sev_es_hcall_prepare) 2042 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 2043 2044 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 2045 if (ret != ES_OK) 2046 return ret; 2047 2048 if (!ghcb_rax_is_valid(ghcb)) 2049 return ES_VMM_ERROR; 2050 2051 ctxt->regs->ax = ghcb->save.rax; 2052 2053 /* 2054 * Call sev_es_hcall_finish() after regs->ax is already set. 2055 * This allows the hypervisor handler to overwrite it again if 2056 * necessary. 2057 */ 2058 if (x86_platform.hyper.sev_es_hcall_finish && 2059 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 2060 return ES_VMM_ERROR; 2061 2062 return ES_OK; 2063 } 2064 2065 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 2066 struct es_em_ctxt *ctxt) 2067 { 2068 /* 2069 * Calling ecx_alignment_check() directly does not work, because it 2070 * enables IRQs and the GHCB is active. Forward the exception and call 2071 * it later from vc_forward_exception(). 2072 */ 2073 ctxt->fi.vector = X86_TRAP_AC; 2074 ctxt->fi.error_code = 0; 2075 return ES_EXCEPTION; 2076 } 2077 2078 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 2079 struct ghcb *ghcb, 2080 unsigned long exit_code) 2081 { 2082 enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); 2083 2084 if (result != ES_OK) 2085 return result; 2086 2087 switch (exit_code) { 2088 case SVM_EXIT_READ_DR7: 2089 result = vc_handle_dr7_read(ghcb, ctxt); 2090 break; 2091 case SVM_EXIT_WRITE_DR7: 2092 result = vc_handle_dr7_write(ghcb, ctxt); 2093 break; 2094 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 2095 result = vc_handle_trap_ac(ghcb, ctxt); 2096 break; 2097 case SVM_EXIT_RDTSC: 2098 case SVM_EXIT_RDTSCP: 2099 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 2100 break; 2101 case SVM_EXIT_RDPMC: 2102 result = vc_handle_rdpmc(ghcb, ctxt); 2103 break; 2104 case SVM_EXIT_INVD: 2105 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 2106 result = ES_UNSUPPORTED; 2107 break; 2108 case SVM_EXIT_CPUID: 2109 result = vc_handle_cpuid(ghcb, ctxt); 2110 break; 2111 case SVM_EXIT_IOIO: 2112 result = vc_handle_ioio(ghcb, ctxt); 2113 break; 2114 case SVM_EXIT_MSR: 2115 result = vc_handle_msr(ghcb, ctxt); 2116 break; 2117 case SVM_EXIT_VMMCALL: 2118 result = vc_handle_vmmcall(ghcb, ctxt); 2119 break; 2120 case SVM_EXIT_WBINVD: 2121 result = vc_handle_wbinvd(ghcb, ctxt); 2122 break; 2123 case SVM_EXIT_MONITOR: 2124 result = vc_handle_monitor(ghcb, ctxt); 2125 break; 2126 case SVM_EXIT_MWAIT: 2127 result = vc_handle_mwait(ghcb, ctxt); 2128 break; 2129 case SVM_EXIT_NPF: 2130 result = vc_handle_mmio(ghcb, ctxt); 2131 break; 2132 default: 2133 /* 2134 * Unexpected #VC exception 2135 */ 2136 result = ES_UNSUPPORTED; 2137 } 2138 2139 return result; 2140 } 2141 2142 static __always_inline bool is_vc2_stack(unsigned long sp) 2143 { 2144 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 2145 } 2146 2147 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 2148 { 2149 unsigned long sp, prev_sp; 2150 2151 sp = (unsigned long)regs; 2152 prev_sp = regs->sp; 2153 2154 /* 2155 * If the code was already executing on the VC2 stack when the #VC 2156 * happened, let it proceed to the normal handling routine. This way the 2157 * code executing on the VC2 stack can cause #VC exceptions to get handled. 2158 */ 2159 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 2160 } 2161 2162 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 2163 { 2164 struct ghcb_state state; 2165 struct es_em_ctxt ctxt; 2166 enum es_result result; 2167 struct ghcb *ghcb; 2168 bool ret = true; 2169 2170 ghcb = __sev_get_ghcb(&state); 2171 2172 vc_ghcb_invalidate(ghcb); 2173 result = vc_init_em_ctxt(&ctxt, regs, error_code); 2174 2175 if (result == ES_OK) 2176 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 2177 2178 __sev_put_ghcb(&state); 2179 2180 /* Done - now check the result */ 2181 switch (result) { 2182 case ES_OK: 2183 vc_finish_insn(&ctxt); 2184 break; 2185 case ES_UNSUPPORTED: 2186 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 2187 error_code, regs->ip); 2188 ret = false; 2189 break; 2190 case ES_VMM_ERROR: 2191 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2192 error_code, regs->ip); 2193 ret = false; 2194 break; 2195 case ES_DECODE_FAILED: 2196 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2197 error_code, regs->ip); 2198 ret = false; 2199 break; 2200 case ES_EXCEPTION: 2201 vc_forward_exception(&ctxt); 2202 break; 2203 case ES_RETRY: 2204 /* Nothing to do */ 2205 break; 2206 default: 2207 pr_emerg("Unknown result in %s():%d\n", __func__, result); 2208 /* 2209 * Emulating the instruction which caused the #VC exception 2210 * failed - can't continue so print debug information 2211 */ 2212 BUG(); 2213 } 2214 2215 return ret; 2216 } 2217 2218 static __always_inline bool vc_is_db(unsigned long error_code) 2219 { 2220 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 2221 } 2222 2223 /* 2224 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 2225 * and will panic when an error happens. 2226 */ 2227 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 2228 { 2229 irqentry_state_t irq_state; 2230 2231 /* 2232 * With the current implementation it is always possible to switch to a 2233 * safe stack because #VC exceptions only happen at known places, like 2234 * intercepted instructions or accesses to MMIO areas/IO ports. They can 2235 * also happen with code instrumentation when the hypervisor intercepts 2236 * #DB, but the critical paths are forbidden to be instrumented, so #DB 2237 * exceptions currently also only happen in safe places. 2238 * 2239 * But keep this here in case the noinstr annotations are violated due 2240 * to bug elsewhere. 2241 */ 2242 if (unlikely(vc_from_invalid_context(regs))) { 2243 instrumentation_begin(); 2244 panic("Can't handle #VC exception from unsupported context\n"); 2245 instrumentation_end(); 2246 } 2247 2248 /* 2249 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2250 */ 2251 if (vc_is_db(error_code)) { 2252 exc_debug(regs); 2253 return; 2254 } 2255 2256 irq_state = irqentry_nmi_enter(regs); 2257 2258 instrumentation_begin(); 2259 2260 if (!vc_raw_handle_exception(regs, error_code)) { 2261 /* Show some debug info */ 2262 show_regs(regs); 2263 2264 /* Ask hypervisor to sev_es_terminate */ 2265 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2266 2267 /* If that fails and we get here - just panic */ 2268 panic("Returned from Terminate-Request to Hypervisor\n"); 2269 } 2270 2271 instrumentation_end(); 2272 irqentry_nmi_exit(regs, irq_state); 2273 } 2274 2275 /* 2276 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 2277 * and will kill the current task with SIGBUS when an error happens. 2278 */ 2279 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 2280 { 2281 /* 2282 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2283 */ 2284 if (vc_is_db(error_code)) { 2285 noist_exc_debug(regs); 2286 return; 2287 } 2288 2289 irqentry_enter_from_user_mode(regs); 2290 instrumentation_begin(); 2291 2292 if (!vc_raw_handle_exception(regs, error_code)) { 2293 /* 2294 * Do not kill the machine if user-space triggered the 2295 * exception. Send SIGBUS instead and let user-space deal with 2296 * it. 2297 */ 2298 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 2299 } 2300 2301 instrumentation_end(); 2302 irqentry_exit_to_user_mode(regs); 2303 } 2304 2305 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 2306 { 2307 unsigned long exit_code = regs->orig_ax; 2308 struct es_em_ctxt ctxt; 2309 enum es_result result; 2310 2311 vc_ghcb_invalidate(boot_ghcb); 2312 2313 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 2314 if (result == ES_OK) 2315 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 2316 2317 /* Done - now check the result */ 2318 switch (result) { 2319 case ES_OK: 2320 vc_finish_insn(&ctxt); 2321 break; 2322 case ES_UNSUPPORTED: 2323 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 2324 exit_code, regs->ip); 2325 goto fail; 2326 case ES_VMM_ERROR: 2327 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2328 exit_code, regs->ip); 2329 goto fail; 2330 case ES_DECODE_FAILED: 2331 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2332 exit_code, regs->ip); 2333 goto fail; 2334 case ES_EXCEPTION: 2335 vc_early_forward_exception(&ctxt); 2336 break; 2337 case ES_RETRY: 2338 /* Nothing to do */ 2339 break; 2340 default: 2341 BUG(); 2342 } 2343 2344 return true; 2345 2346 fail: 2347 show_regs(regs); 2348 2349 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2350 } 2351 2352 /* 2353 * Initial set up of SNP relies on information provided by the 2354 * Confidential Computing blob, which can be passed to the kernel 2355 * in the following ways, depending on how it is booted: 2356 * 2357 * - when booted via the boot/decompress kernel: 2358 * - via boot_params 2359 * 2360 * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): 2361 * - via a setup_data entry, as defined by the Linux Boot Protocol 2362 * 2363 * Scan for the blob in that order. 2364 */ 2365 static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) 2366 { 2367 struct cc_blob_sev_info *cc_info; 2368 2369 /* Boot kernel would have passed the CC blob via boot_params. */ 2370 if (bp->cc_blob_address) { 2371 cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; 2372 goto found_cc_info; 2373 } 2374 2375 /* 2376 * If kernel was booted directly, without the use of the 2377 * boot/decompression kernel, the CC blob may have been passed via 2378 * setup_data instead. 2379 */ 2380 cc_info = find_cc_blob_setup_data(bp); 2381 if (!cc_info) 2382 return NULL; 2383 2384 found_cc_info: 2385 if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) 2386 snp_abort(); 2387 2388 return cc_info; 2389 } 2390 2391 static __head void svsm_setup(struct cc_blob_sev_info *cc_info) 2392 { 2393 struct svsm_call call = {}; 2394 int ret; 2395 u64 pa; 2396 2397 /* 2398 * Record the SVSM Calling Area address (CAA) if the guest is not 2399 * running at VMPL0. The CA will be used to communicate with the 2400 * SVSM to perform the SVSM services. 2401 */ 2402 if (!svsm_setup_ca(cc_info)) 2403 return; 2404 2405 /* 2406 * It is very early in the boot and the kernel is running identity 2407 * mapped but without having adjusted the pagetables to where the 2408 * kernel was loaded (physbase), so the get the CA address using 2409 * RIP-relative addressing. 2410 */ 2411 pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); 2412 2413 /* 2414 * Switch over to the boot SVSM CA while the current CA is still 2415 * addressable. There is no GHCB at this point so use the MSR protocol. 2416 * 2417 * SVSM_CORE_REMAP_CA call: 2418 * RAX = 0 (Protocol=0, CallID=0) 2419 * RCX = New CA GPA 2420 */ 2421 call.caa = svsm_get_caa(); 2422 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 2423 call.rcx = pa; 2424 ret = svsm_perform_call_protocol(&call); 2425 if (ret) 2426 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", ret, call.rax_out); 2427 2428 RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; 2429 RIP_REL_REF(boot_svsm_caa_pa) = pa; 2430 } 2431 2432 bool __head snp_init(struct boot_params *bp) 2433 { 2434 struct cc_blob_sev_info *cc_info; 2435 2436 if (!bp) 2437 return false; 2438 2439 cc_info = find_cc_blob(bp); 2440 if (!cc_info) 2441 return false; 2442 2443 if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) 2444 secrets_pa = cc_info->secrets_phys; 2445 else 2446 return false; 2447 2448 setup_cpuid_table(cc_info); 2449 2450 svsm_setup(cc_info); 2451 2452 /* 2453 * The CC blob will be used later to access the secrets page. Cache 2454 * it here like the boot kernel does. 2455 */ 2456 bp->cc_blob_address = (u32)(unsigned long)cc_info; 2457 2458 return true; 2459 } 2460 2461 void __head __noreturn snp_abort(void) 2462 { 2463 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 2464 } 2465 2466 /* 2467 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 2468 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 2469 * ROM region can cause a crash since this region is not pre-validated. 2470 */ 2471 void __init snp_dmi_setup(void) 2472 { 2473 if (efi_enabled(EFI_CONFIG_TABLES)) 2474 dmi_setup(); 2475 } 2476 2477 static void dump_cpuid_table(void) 2478 { 2479 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2480 int i = 0; 2481 2482 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 2483 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 2484 2485 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 2486 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 2487 2488 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 2489 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 2490 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 2491 } 2492 } 2493 2494 /* 2495 * It is useful from an auditing/testing perspective to provide an easy way 2496 * for the guest owner to know that the CPUID table has been initialized as 2497 * expected, but that initialization happens too early in boot to print any 2498 * sort of indicator, and there's not really any other good place to do it, 2499 * so do it here. 2500 * 2501 * If running as an SNP guest, report the current VM privilege level (VMPL). 2502 */ 2503 static int __init report_snp_info(void) 2504 { 2505 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2506 2507 if (cpuid_table->count) { 2508 pr_info("Using SNP CPUID table, %d entries present.\n", 2509 cpuid_table->count); 2510 2511 if (sev_cfg.debug) 2512 dump_cpuid_table(); 2513 } 2514 2515 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2516 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 2517 2518 return 0; 2519 } 2520 arch_initcall(report_snp_info); 2521 2522 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) 2523 { 2524 /* If (new) lengths have been returned, propagate them up */ 2525 if (call->rcx_out != call->rcx) 2526 input->manifest_buf.len = call->rcx_out; 2527 2528 if (call->rdx_out != call->rdx) 2529 input->certificates_buf.len = call->rdx_out; 2530 2531 if (call->r8_out != call->r8) 2532 input->report_buf.len = call->r8_out; 2533 } 2534 2535 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, 2536 struct svsm_attest_call *input) 2537 { 2538 struct svsm_attest_call *ac; 2539 unsigned long flags; 2540 u64 attest_call_pa; 2541 int ret; 2542 2543 if (!snp_vmpl) 2544 return -EINVAL; 2545 2546 local_irq_save(flags); 2547 2548 call->caa = svsm_get_caa(); 2549 2550 ac = (struct svsm_attest_call *)call->caa->svsm_buffer; 2551 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); 2552 2553 *ac = *input; 2554 2555 /* 2556 * Set input registers for the request and set RDX and R8 to known 2557 * values in order to detect length values being returned in them. 2558 */ 2559 call->rax = call_id; 2560 call->rcx = attest_call_pa; 2561 call->rdx = -1; 2562 call->r8 = -1; 2563 ret = svsm_perform_call_protocol(call); 2564 update_attest_input(call, input); 2565 2566 local_irq_restore(flags); 2567 2568 return ret; 2569 } 2570 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); 2571 2572 static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, 2573 struct snp_guest_request_ioctl *rio) 2574 { 2575 struct ghcb_state state; 2576 struct es_em_ctxt ctxt; 2577 unsigned long flags; 2578 struct ghcb *ghcb; 2579 int ret; 2580 2581 rio->exitinfo2 = SEV_RET_NO_FW_CALL; 2582 2583 /* 2584 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 2585 * a per-CPU GHCB. 2586 */ 2587 local_irq_save(flags); 2588 2589 ghcb = __sev_get_ghcb(&state); 2590 if (!ghcb) { 2591 ret = -EIO; 2592 goto e_restore_irq; 2593 } 2594 2595 vc_ghcb_invalidate(ghcb); 2596 2597 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2598 ghcb_set_rax(ghcb, input->data_gpa); 2599 ghcb_set_rbx(ghcb, input->data_npages); 2600 } 2601 2602 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 2603 if (ret) 2604 goto e_put; 2605 2606 rio->exitinfo2 = ghcb->save.sw_exit_info_2; 2607 switch (rio->exitinfo2) { 2608 case 0: 2609 break; 2610 2611 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 2612 ret = -EAGAIN; 2613 break; 2614 2615 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 2616 /* Number of expected pages are returned in RBX */ 2617 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2618 input->data_npages = ghcb_get_rbx(ghcb); 2619 ret = -ENOSPC; 2620 break; 2621 } 2622 fallthrough; 2623 default: 2624 ret = -EIO; 2625 break; 2626 } 2627 2628 e_put: 2629 __sev_put_ghcb(&state); 2630 e_restore_irq: 2631 local_irq_restore(flags); 2632 2633 return ret; 2634 } 2635 2636 static struct platform_device sev_guest_device = { 2637 .name = "sev-guest", 2638 .id = -1, 2639 }; 2640 2641 static int __init snp_init_platform_device(void) 2642 { 2643 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2644 return -ENODEV; 2645 2646 if (platform_device_register(&sev_guest_device)) 2647 return -ENODEV; 2648 2649 pr_info("SNP guest platform device initialized.\n"); 2650 return 0; 2651 } 2652 device_initcall(snp_init_platform_device); 2653 2654 void sev_show_status(void) 2655 { 2656 int i; 2657 2658 pr_info("Status: "); 2659 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 2660 if (sev_status & BIT_ULL(i)) { 2661 if (!sev_status_feat_names[i]) 2662 continue; 2663 2664 pr_cont("%s ", sev_status_feat_names[i]); 2665 } 2666 } 2667 pr_cont("\n"); 2668 } 2669 2670 void __init snp_update_svsm_ca(void) 2671 { 2672 if (!snp_vmpl) 2673 return; 2674 2675 /* Update the CAA to a proper kernel address */ 2676 boot_svsm_caa = &boot_svsm_ca_page; 2677 } 2678 2679 #ifdef CONFIG_SYSFS 2680 static ssize_t vmpl_show(struct kobject *kobj, 2681 struct kobj_attribute *attr, char *buf) 2682 { 2683 return sysfs_emit(buf, "%d\n", snp_vmpl); 2684 } 2685 2686 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 2687 2688 static struct attribute *vmpl_attrs[] = { 2689 &vmpl_attr.attr, 2690 NULL 2691 }; 2692 2693 static struct attribute_group sev_attr_group = { 2694 .attrs = vmpl_attrs, 2695 }; 2696 2697 static int __init sev_sysfs_init(void) 2698 { 2699 struct kobject *sev_kobj; 2700 struct device *dev_root; 2701 int ret; 2702 2703 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2704 return -ENODEV; 2705 2706 dev_root = bus_get_dev_root(&cpu_subsys); 2707 if (!dev_root) 2708 return -ENODEV; 2709 2710 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 2711 put_device(dev_root); 2712 2713 if (!sev_kobj) 2714 return -ENOMEM; 2715 2716 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 2717 if (ret) 2718 kobject_put(sev_kobj); 2719 2720 return ret; 2721 } 2722 arch_initcall(sev_sysfs_init); 2723 #endif // CONFIG_SYSFS 2724 2725 static void free_shared_pages(void *buf, size_t sz) 2726 { 2727 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2728 int ret; 2729 2730 if (!buf) 2731 return; 2732 2733 ret = set_memory_encrypted((unsigned long)buf, npages); 2734 if (ret) { 2735 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 2736 return; 2737 } 2738 2739 __free_pages(virt_to_page(buf), get_order(sz)); 2740 } 2741 2742 static void *alloc_shared_pages(size_t sz) 2743 { 2744 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2745 struct page *page; 2746 int ret; 2747 2748 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 2749 if (!page) 2750 return NULL; 2751 2752 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 2753 if (ret) { 2754 pr_err("failed to mark page shared, ret=%d\n", ret); 2755 __free_pages(page, get_order(sz)); 2756 return NULL; 2757 } 2758 2759 return page_address(page); 2760 } 2761 2762 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 2763 { 2764 u8 *key = NULL; 2765 2766 switch (id) { 2767 case 0: 2768 *seqno = &secrets->os_area.msg_seqno_0; 2769 key = secrets->vmpck0; 2770 break; 2771 case 1: 2772 *seqno = &secrets->os_area.msg_seqno_1; 2773 key = secrets->vmpck1; 2774 break; 2775 case 2: 2776 *seqno = &secrets->os_area.msg_seqno_2; 2777 key = secrets->vmpck2; 2778 break; 2779 case 3: 2780 *seqno = &secrets->os_area.msg_seqno_3; 2781 key = secrets->vmpck3; 2782 break; 2783 default: 2784 break; 2785 } 2786 2787 return key; 2788 } 2789 2790 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 2791 { 2792 struct aesgcm_ctx *ctx; 2793 2794 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 2795 if (!ctx) 2796 return NULL; 2797 2798 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 2799 pr_err("Crypto context initialization failed\n"); 2800 kfree(ctx); 2801 return NULL; 2802 } 2803 2804 return ctx; 2805 } 2806 2807 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 2808 { 2809 /* Adjust the default VMPCK key based on the executing VMPL level */ 2810 if (vmpck_id == -1) 2811 vmpck_id = snp_vmpl; 2812 2813 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 2814 if (!mdesc->vmpck) { 2815 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 2816 return -EINVAL; 2817 } 2818 2819 /* Verify that VMPCK is not zero. */ 2820 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 2821 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 2822 return -EINVAL; 2823 } 2824 2825 mdesc->vmpck_id = vmpck_id; 2826 2827 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 2828 if (!mdesc->ctx) 2829 return -ENOMEM; 2830 2831 return 0; 2832 } 2833 EXPORT_SYMBOL_GPL(snp_msg_init); 2834 2835 struct snp_msg_desc *snp_msg_alloc(void) 2836 { 2837 struct snp_msg_desc *mdesc; 2838 void __iomem *mem; 2839 2840 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 2841 2842 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); 2843 if (!mdesc) 2844 return ERR_PTR(-ENOMEM); 2845 2846 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 2847 if (!mem) 2848 goto e_free_mdesc; 2849 2850 mdesc->secrets = (__force struct snp_secrets_page *)mem; 2851 2852 /* Allocate the shared page used for the request and response message. */ 2853 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2854 if (!mdesc->request) 2855 goto e_unmap; 2856 2857 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2858 if (!mdesc->response) 2859 goto e_free_request; 2860 2861 mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); 2862 if (!mdesc->certs_data) 2863 goto e_free_response; 2864 2865 /* initial the input address for guest request */ 2866 mdesc->input.req_gpa = __pa(mdesc->request); 2867 mdesc->input.resp_gpa = __pa(mdesc->response); 2868 mdesc->input.data_gpa = __pa(mdesc->certs_data); 2869 2870 return mdesc; 2871 2872 e_free_response: 2873 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 2874 e_free_request: 2875 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2876 e_unmap: 2877 iounmap(mem); 2878 e_free_mdesc: 2879 kfree(mdesc); 2880 2881 return ERR_PTR(-ENOMEM); 2882 } 2883 EXPORT_SYMBOL_GPL(snp_msg_alloc); 2884 2885 void snp_msg_free(struct snp_msg_desc *mdesc) 2886 { 2887 if (!mdesc) 2888 return; 2889 2890 kfree(mdesc->ctx); 2891 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 2892 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2893 free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); 2894 iounmap((__force void __iomem *)mdesc->secrets); 2895 2896 memset(mdesc, 0, sizeof(*mdesc)); 2897 kfree(mdesc); 2898 } 2899 EXPORT_SYMBOL_GPL(snp_msg_free); 2900 2901 /* Mutex to serialize the shared buffer access and command handling. */ 2902 static DEFINE_MUTEX(snp_cmd_mutex); 2903 2904 /* 2905 * If an error is received from the host or AMD Secure Processor (ASP) there 2906 * are two options. Either retry the exact same encrypted request or discontinue 2907 * using the VMPCK. 2908 * 2909 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 2910 * encrypt the requests. The IV for this scheme is the sequence number. GCM 2911 * cannot tolerate IV reuse. 2912 * 2913 * The ASP FW v1.51 only increments the sequence numbers on a successful 2914 * guest<->ASP back and forth and only accepts messages at its exact sequence 2915 * number. 2916 * 2917 * So if the sequence number were to be reused the encryption scheme is 2918 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 2919 * will reject the request. 2920 */ 2921 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 2922 { 2923 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 2924 mdesc->vmpck_id); 2925 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 2926 mdesc->vmpck = NULL; 2927 } 2928 2929 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2930 { 2931 u64 count; 2932 2933 lockdep_assert_held(&snp_cmd_mutex); 2934 2935 /* Read the current message sequence counter from secrets pages */ 2936 count = *mdesc->os_area_msg_seqno; 2937 2938 return count + 1; 2939 } 2940 2941 /* Return a non-zero on success */ 2942 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2943 { 2944 u64 count = __snp_get_msg_seqno(mdesc); 2945 2946 /* 2947 * The message sequence counter for the SNP guest request is a 64-bit 2948 * value but the version 2 of GHCB specification defines a 32-bit storage 2949 * for it. If the counter exceeds the 32-bit value then return zero. 2950 * The caller should check the return value, but if the caller happens to 2951 * not check the value and use it, then the firmware treats zero as an 2952 * invalid number and will fail the message request. 2953 */ 2954 if (count >= UINT_MAX) { 2955 pr_err("request message sequence counter overflow\n"); 2956 return 0; 2957 } 2958 2959 return count; 2960 } 2961 2962 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 2963 { 2964 /* 2965 * The counter is also incremented by the PSP, so increment it by 2 2966 * and save in secrets page. 2967 */ 2968 *mdesc->os_area_msg_seqno += 2; 2969 } 2970 2971 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 2972 { 2973 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 2974 struct snp_guest_msg *req_msg = &mdesc->secret_request; 2975 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 2976 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 2977 struct aesgcm_ctx *ctx = mdesc->ctx; 2978 u8 iv[GCM_AES_IV_SIZE] = {}; 2979 2980 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 2981 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 2982 resp_msg_hdr->msg_sz); 2983 2984 /* Copy response from shared memory to encrypted memory. */ 2985 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 2986 2987 /* Verify that the sequence counter is incremented by 1 */ 2988 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 2989 return -EBADMSG; 2990 2991 /* Verify response message type and version number. */ 2992 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 2993 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 2994 return -EBADMSG; 2995 2996 /* 2997 * If the message size is greater than our buffer length then return 2998 * an error. 2999 */ 3000 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 3001 return -EBADMSG; 3002 3003 /* Decrypt the payload */ 3004 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 3005 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 3006 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 3007 return -EBADMSG; 3008 3009 return 0; 3010 } 3011 3012 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 3013 { 3014 struct snp_guest_msg *msg = &mdesc->secret_request; 3015 struct snp_guest_msg_hdr *hdr = &msg->hdr; 3016 struct aesgcm_ctx *ctx = mdesc->ctx; 3017 u8 iv[GCM_AES_IV_SIZE] = {}; 3018 3019 memset(msg, 0, sizeof(*msg)); 3020 3021 hdr->algo = SNP_AEAD_AES_256_GCM; 3022 hdr->hdr_version = MSG_HDR_VER; 3023 hdr->hdr_sz = sizeof(*hdr); 3024 hdr->msg_type = req->msg_type; 3025 hdr->msg_version = req->msg_version; 3026 hdr->msg_seqno = seqno; 3027 hdr->msg_vmpck = req->vmpck_id; 3028 hdr->msg_sz = req->req_sz; 3029 3030 /* Verify the sequence number is non-zero */ 3031 if (!hdr->msg_seqno) 3032 return -ENOSR; 3033 3034 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 3035 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 3036 3037 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 3038 return -EBADMSG; 3039 3040 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 3041 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 3042 AAD_LEN, iv, hdr->authtag); 3043 3044 return 0; 3045 } 3046 3047 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3048 struct snp_guest_request_ioctl *rio) 3049 { 3050 unsigned long req_start = jiffies; 3051 unsigned int override_npages = 0; 3052 u64 override_err = 0; 3053 int rc; 3054 3055 retry_request: 3056 /* 3057 * Call firmware to process the request. In this function the encrypted 3058 * message enters shared memory with the host. So after this call the 3059 * sequence number must be incremented or the VMPCK must be deleted to 3060 * prevent reuse of the IV. 3061 */ 3062 rc = snp_issue_guest_request(req, &mdesc->input, rio); 3063 switch (rc) { 3064 case -ENOSPC: 3065 /* 3066 * If the extended guest request fails due to having too 3067 * small of a certificate data buffer, retry the same 3068 * guest request without the extended data request in 3069 * order to increment the sequence number and thus avoid 3070 * IV reuse. 3071 */ 3072 override_npages = mdesc->input.data_npages; 3073 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3074 3075 /* 3076 * Override the error to inform callers the given extended 3077 * request buffer size was too small and give the caller the 3078 * required buffer size. 3079 */ 3080 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 3081 3082 /* 3083 * If this call to the firmware succeeds, the sequence number can 3084 * be incremented allowing for continued use of the VMPCK. If 3085 * there is an error reflected in the return value, this value 3086 * is checked further down and the result will be the deletion 3087 * of the VMPCK and the error code being propagated back to the 3088 * user as an ioctl() return code. 3089 */ 3090 goto retry_request; 3091 3092 /* 3093 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 3094 * throttled. Retry in the driver to avoid returning and reusing the 3095 * message sequence number on a different message. 3096 */ 3097 case -EAGAIN: 3098 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 3099 rc = -ETIMEDOUT; 3100 break; 3101 } 3102 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 3103 goto retry_request; 3104 } 3105 3106 /* 3107 * Increment the message sequence number. There is no harm in doing 3108 * this now because decryption uses the value stored in the response 3109 * structure and any failure will wipe the VMPCK, preventing further 3110 * use anyway. 3111 */ 3112 snp_inc_msg_seqno(mdesc); 3113 3114 if (override_err) { 3115 rio->exitinfo2 = override_err; 3116 3117 /* 3118 * If an extended guest request was issued and the supplied certificate 3119 * buffer was not large enough, a standard guest request was issued to 3120 * prevent IV reuse. If the standard request was successful, return -EIO 3121 * back to the caller as would have originally been returned. 3122 */ 3123 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3124 rc = -EIO; 3125 } 3126 3127 if (override_npages) 3128 mdesc->input.data_npages = override_npages; 3129 3130 return rc; 3131 } 3132 3133 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3134 struct snp_guest_request_ioctl *rio) 3135 { 3136 u64 seqno; 3137 int rc; 3138 3139 guard(mutex)(&snp_cmd_mutex); 3140 3141 /* Check if the VMPCK is not empty */ 3142 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 3143 pr_err_ratelimited("VMPCK is disabled\n"); 3144 return -ENOTTY; 3145 } 3146 3147 /* Get message sequence and verify that its a non-zero */ 3148 seqno = snp_get_msg_seqno(mdesc); 3149 if (!seqno) 3150 return -EIO; 3151 3152 /* Clear shared memory's response for the host to populate. */ 3153 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 3154 3155 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 3156 rc = enc_payload(mdesc, seqno, req); 3157 if (rc) 3158 return rc; 3159 3160 /* 3161 * Write the fully encrypted request to the shared unencrypted 3162 * request page. 3163 */ 3164 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 3165 3166 rc = __handle_guest_request(mdesc, req, rio); 3167 if (rc) { 3168 if (rc == -EIO && 3169 rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3170 return rc; 3171 3172 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 3173 rc, rio->exitinfo2); 3174 3175 snp_disable_vmpck(mdesc); 3176 return rc; 3177 } 3178 3179 rc = verify_and_dec_payload(mdesc, req); 3180 if (rc) { 3181 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 3182 snp_disable_vmpck(mdesc); 3183 return rc; 3184 } 3185 3186 return 0; 3187 } 3188 EXPORT_SYMBOL_GPL(snp_send_guest_request); 3189 3190 static int __init snp_get_tsc_info(void) 3191 { 3192 struct snp_guest_request_ioctl *rio; 3193 struct snp_tsc_info_resp *tsc_resp; 3194 struct snp_tsc_info_req *tsc_req; 3195 struct snp_msg_desc *mdesc; 3196 struct snp_guest_req *req; 3197 int rc = -ENOMEM; 3198 3199 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); 3200 if (!tsc_req) 3201 return rc; 3202 3203 /* 3204 * The intermediate response buffer is used while decrypting the 3205 * response payload. Make sure that it has enough space to cover 3206 * the authtag. 3207 */ 3208 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 3209 if (!tsc_resp) 3210 goto e_free_tsc_req; 3211 3212 req = kzalloc(sizeof(*req), GFP_KERNEL); 3213 if (!req) 3214 goto e_free_tsc_resp; 3215 3216 rio = kzalloc(sizeof(*rio), GFP_KERNEL); 3217 if (!rio) 3218 goto e_free_req; 3219 3220 mdesc = snp_msg_alloc(); 3221 if (IS_ERR_OR_NULL(mdesc)) 3222 goto e_free_rio; 3223 3224 rc = snp_msg_init(mdesc, snp_vmpl); 3225 if (rc) 3226 goto e_free_mdesc; 3227 3228 req->msg_version = MSG_HDR_VER; 3229 req->msg_type = SNP_MSG_TSC_INFO_REQ; 3230 req->vmpck_id = snp_vmpl; 3231 req->req_buf = tsc_req; 3232 req->req_sz = sizeof(*tsc_req); 3233 req->resp_buf = (void *)tsc_resp; 3234 req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 3235 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3236 3237 rc = snp_send_guest_request(mdesc, req, rio); 3238 if (rc) 3239 goto e_request; 3240 3241 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 3242 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 3243 tsc_resp->tsc_factor); 3244 3245 if (!tsc_resp->status) { 3246 snp_tsc_scale = tsc_resp->tsc_scale; 3247 snp_tsc_offset = tsc_resp->tsc_offset; 3248 } else { 3249 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 3250 rc = -EIO; 3251 } 3252 3253 e_request: 3254 /* The response buffer contains sensitive data, explicitly clear it. */ 3255 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 3256 e_free_mdesc: 3257 snp_msg_free(mdesc); 3258 e_free_rio: 3259 kfree(rio); 3260 e_free_req: 3261 kfree(req); 3262 e_free_tsc_resp: 3263 kfree(tsc_resp); 3264 e_free_tsc_req: 3265 kfree(tsc_req); 3266 3267 return rc; 3268 } 3269 3270 void __init snp_secure_tsc_prepare(void) 3271 { 3272 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3273 return; 3274 3275 if (snp_get_tsc_info()) { 3276 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 3277 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 3278 } 3279 3280 pr_debug("SecureTSC enabled"); 3281 } 3282 3283 static unsigned long securetsc_get_tsc_khz(void) 3284 { 3285 return snp_tsc_freq_khz; 3286 } 3287 3288 void __init snp_secure_tsc_init(void) 3289 { 3290 unsigned long long tsc_freq_mhz; 3291 3292 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3293 return; 3294 3295 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 3296 rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 3297 snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); 3298 3299 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 3300 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 3301 } 3302