1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/cpumask.h> 22 #include <linux/efi.h> 23 #include <linux/platform_device.h> 24 #include <linux/io.h> 25 #include <linux/psp-sev.h> 26 #include <linux/dmi.h> 27 #include <uapi/linux/sev-guest.h> 28 #include <crypto/gcm.h> 29 30 #include <asm/init.h> 31 #include <asm/cpu_entry_area.h> 32 #include <asm/stacktrace.h> 33 #include <asm/sev.h> 34 #include <asm/insn-eval.h> 35 #include <asm/fpu/xcr.h> 36 #include <asm/processor.h> 37 #include <asm/realmode.h> 38 #include <asm/setup.h> 39 #include <asm/traps.h> 40 #include <asm/svm.h> 41 #include <asm/smp.h> 42 #include <asm/cpu.h> 43 #include <asm/apic.h> 44 #include <asm/cpuid.h> 45 #include <asm/cmdline.h> 46 47 #define DR7_RESET_VALUE 0x400 48 49 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 50 #define AP_INIT_CS_LIMIT 0xffff 51 #define AP_INIT_DS_LIMIT 0xffff 52 #define AP_INIT_LDTR_LIMIT 0xffff 53 #define AP_INIT_GDTR_LIMIT 0xffff 54 #define AP_INIT_IDTR_LIMIT 0xffff 55 #define AP_INIT_TR_LIMIT 0xffff 56 #define AP_INIT_RFLAGS_DEFAULT 0x2 57 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 58 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 59 #define AP_INIT_XCR0_DEFAULT 0x1 60 #define AP_INIT_X87_FTW_DEFAULT 0x5555 61 #define AP_INIT_X87_FCW_DEFAULT 0x0040 62 #define AP_INIT_CR0_DEFAULT 0x60000010 63 #define AP_INIT_MXCSR_DEFAULT 0x1f80 64 65 static const char * const sev_status_feat_names[] = { 66 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 67 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 68 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 69 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 70 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 71 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 72 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 73 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 74 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 75 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 76 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 77 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 78 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 79 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 80 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 81 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 82 }; 83 84 /* For early boot hypervisor communication in SEV-ES enabled guests */ 85 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 86 87 /* 88 * Needs to be in the .data section because we need it NULL before bss is 89 * cleared 90 */ 91 static struct ghcb *boot_ghcb __section(".data"); 92 93 /* Bitmap of SEV features supported by the hypervisor */ 94 static u64 sev_hv_features __ro_after_init; 95 96 /* Secrets page physical address from the CC blob */ 97 static u64 secrets_pa __ro_after_init; 98 99 /* 100 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 101 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 102 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 103 */ 104 static u64 snp_tsc_scale __ro_after_init; 105 static u64 snp_tsc_offset __ro_after_init; 106 static u64 snp_tsc_freq_khz __ro_after_init; 107 108 /* #VC handler runtime per-CPU data */ 109 struct sev_es_runtime_data { 110 struct ghcb ghcb_page; 111 112 /* 113 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 114 * It is needed when an NMI happens while the #VC handler uses the real 115 * GHCB, and the NMI handler itself is causing another #VC exception. In 116 * that case the GHCB content of the first handler needs to be backed up 117 * and restored. 118 */ 119 struct ghcb backup_ghcb; 120 121 /* 122 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 123 * There is no need for it to be atomic, because nothing is written to 124 * the GHCB between the read and the write of ghcb_active. So it is safe 125 * to use it when a nested #VC exception happens before the write. 126 * 127 * This is necessary for example in the #VC->NMI->#VC case when the NMI 128 * happens while the first #VC handler uses the GHCB. When the NMI code 129 * raises a second #VC handler it might overwrite the contents of the 130 * GHCB written by the first handler. To avoid this the content of the 131 * GHCB is saved and restored when the GHCB is detected to be in use 132 * already. 133 */ 134 bool ghcb_active; 135 bool backup_ghcb_active; 136 137 /* 138 * Cached DR7 value - write it on DR7 writes and return it on reads. 139 * That value will never make it to the real hardware DR7 as debugging 140 * is currently unsupported in SEV-ES guests. 141 */ 142 unsigned long dr7; 143 }; 144 145 struct ghcb_state { 146 struct ghcb *ghcb; 147 }; 148 149 /* For early boot SVSM communication */ 150 static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); 151 152 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 153 static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 154 static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); 155 static DEFINE_PER_CPU(u64, svsm_caa_pa); 156 157 static __always_inline bool on_vc_stack(struct pt_regs *regs) 158 { 159 unsigned long sp = regs->sp; 160 161 /* User-mode RSP is not trusted */ 162 if (user_mode(regs)) 163 return false; 164 165 /* SYSCALL gap still has user-mode RSP */ 166 if (ip_within_syscall_gap(regs)) 167 return false; 168 169 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 170 } 171 172 /* 173 * This function handles the case when an NMI is raised in the #VC 174 * exception handler entry code, before the #VC handler has switched off 175 * its IST stack. In this case, the IST entry for #VC must be adjusted, 176 * so that any nested #VC exception will not overwrite the stack 177 * contents of the interrupted #VC handler. 178 * 179 * The IST entry is adjusted unconditionally so that it can be also be 180 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 181 * nested sev_es_ist_exit() call may adjust back the IST entry too 182 * early. 183 * 184 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 185 * on the NMI IST stack, as they are only called from NMI handling code 186 * right now. 187 */ 188 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 189 { 190 unsigned long old_ist, new_ist; 191 192 /* Read old IST entry */ 193 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 194 195 /* 196 * If NMI happened while on the #VC IST stack, set the new IST 197 * value below regs->sp, so that the interrupted stack frame is 198 * not overwritten by subsequent #VC exceptions. 199 */ 200 if (on_vc_stack(regs)) 201 new_ist = regs->sp; 202 203 /* 204 * Reserve additional 8 bytes and store old IST value so this 205 * adjustment can be unrolled in __sev_es_ist_exit(). 206 */ 207 new_ist -= sizeof(old_ist); 208 *(unsigned long *)new_ist = old_ist; 209 210 /* Set new IST entry */ 211 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 212 } 213 214 void noinstr __sev_es_ist_exit(void) 215 { 216 unsigned long ist; 217 218 /* Read IST entry */ 219 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 220 221 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 222 return; 223 224 /* Read back old IST entry and write it to the TSS */ 225 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 226 } 227 228 /* 229 * Nothing shall interrupt this code path while holding the per-CPU 230 * GHCB. The backup GHCB is only for NMIs interrupting this path. 231 * 232 * Callers must disable local interrupts around it. 233 */ 234 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 235 { 236 struct sev_es_runtime_data *data; 237 struct ghcb *ghcb; 238 239 WARN_ON(!irqs_disabled()); 240 241 data = this_cpu_read(runtime_data); 242 ghcb = &data->ghcb_page; 243 244 if (unlikely(data->ghcb_active)) { 245 /* GHCB is already in use - save its contents */ 246 247 if (unlikely(data->backup_ghcb_active)) { 248 /* 249 * Backup-GHCB is also already in use. There is no way 250 * to continue here so just kill the machine. To make 251 * panic() work, mark GHCBs inactive so that messages 252 * can be printed out. 253 */ 254 data->ghcb_active = false; 255 data->backup_ghcb_active = false; 256 257 instrumentation_begin(); 258 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 259 instrumentation_end(); 260 } 261 262 /* Mark backup_ghcb active before writing to it */ 263 data->backup_ghcb_active = true; 264 265 state->ghcb = &data->backup_ghcb; 266 267 /* Backup GHCB content */ 268 *state->ghcb = *ghcb; 269 } else { 270 state->ghcb = NULL; 271 data->ghcb_active = true; 272 } 273 274 return ghcb; 275 } 276 277 static inline u64 sev_es_rd_ghcb_msr(void) 278 { 279 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 280 } 281 282 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 283 { 284 u32 low, high; 285 286 low = (u32)(val); 287 high = (u32)(val >> 32); 288 289 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 290 } 291 292 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 293 unsigned char *buffer) 294 { 295 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 296 } 297 298 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 299 { 300 char buffer[MAX_INSN_SIZE]; 301 int insn_bytes; 302 303 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 304 if (insn_bytes == 0) { 305 /* Nothing could be copied */ 306 ctxt->fi.vector = X86_TRAP_PF; 307 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 308 ctxt->fi.cr2 = ctxt->regs->ip; 309 return ES_EXCEPTION; 310 } else if (insn_bytes == -EINVAL) { 311 /* Effective RIP could not be calculated */ 312 ctxt->fi.vector = X86_TRAP_GP; 313 ctxt->fi.error_code = 0; 314 ctxt->fi.cr2 = 0; 315 return ES_EXCEPTION; 316 } 317 318 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 319 return ES_DECODE_FAILED; 320 321 if (ctxt->insn.immediate.got) 322 return ES_OK; 323 else 324 return ES_DECODE_FAILED; 325 } 326 327 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 328 { 329 char buffer[MAX_INSN_SIZE]; 330 int res, ret; 331 332 res = vc_fetch_insn_kernel(ctxt, buffer); 333 if (res) { 334 ctxt->fi.vector = X86_TRAP_PF; 335 ctxt->fi.error_code = X86_PF_INSTR; 336 ctxt->fi.cr2 = ctxt->regs->ip; 337 return ES_EXCEPTION; 338 } 339 340 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 341 if (ret < 0) 342 return ES_DECODE_FAILED; 343 else 344 return ES_OK; 345 } 346 347 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 348 { 349 if (user_mode(ctxt->regs)) 350 return __vc_decode_user_insn(ctxt); 351 else 352 return __vc_decode_kern_insn(ctxt); 353 } 354 355 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 356 char *dst, char *buf, size_t size) 357 { 358 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 359 360 /* 361 * This function uses __put_user() independent of whether kernel or user 362 * memory is accessed. This works fine because __put_user() does no 363 * sanity checks of the pointer being accessed. All that it does is 364 * to report when the access failed. 365 * 366 * Also, this function runs in atomic context, so __put_user() is not 367 * allowed to sleep. The page-fault handler detects that it is running 368 * in atomic context and will not try to take mmap_sem and handle the 369 * fault, so additional pagefault_enable()/disable() calls are not 370 * needed. 371 * 372 * The access can't be done via copy_to_user() here because 373 * vc_write_mem() must not use string instructions to access unsafe 374 * memory. The reason is that MOVS is emulated by the #VC handler by 375 * splitting the move up into a read and a write and taking a nested #VC 376 * exception on whatever of them is the MMIO access. Using string 377 * instructions here would cause infinite nesting. 378 */ 379 switch (size) { 380 case 1: { 381 u8 d1; 382 u8 __user *target = (u8 __user *)dst; 383 384 memcpy(&d1, buf, 1); 385 if (__put_user(d1, target)) 386 goto fault; 387 break; 388 } 389 case 2: { 390 u16 d2; 391 u16 __user *target = (u16 __user *)dst; 392 393 memcpy(&d2, buf, 2); 394 if (__put_user(d2, target)) 395 goto fault; 396 break; 397 } 398 case 4: { 399 u32 d4; 400 u32 __user *target = (u32 __user *)dst; 401 402 memcpy(&d4, buf, 4); 403 if (__put_user(d4, target)) 404 goto fault; 405 break; 406 } 407 case 8: { 408 u64 d8; 409 u64 __user *target = (u64 __user *)dst; 410 411 memcpy(&d8, buf, 8); 412 if (__put_user(d8, target)) 413 goto fault; 414 break; 415 } 416 default: 417 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 418 return ES_UNSUPPORTED; 419 } 420 421 return ES_OK; 422 423 fault: 424 if (user_mode(ctxt->regs)) 425 error_code |= X86_PF_USER; 426 427 ctxt->fi.vector = X86_TRAP_PF; 428 ctxt->fi.error_code = error_code; 429 ctxt->fi.cr2 = (unsigned long)dst; 430 431 return ES_EXCEPTION; 432 } 433 434 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 435 char *src, char *buf, size_t size) 436 { 437 unsigned long error_code = X86_PF_PROT; 438 439 /* 440 * This function uses __get_user() independent of whether kernel or user 441 * memory is accessed. This works fine because __get_user() does no 442 * sanity checks of the pointer being accessed. All that it does is 443 * to report when the access failed. 444 * 445 * Also, this function runs in atomic context, so __get_user() is not 446 * allowed to sleep. The page-fault handler detects that it is running 447 * in atomic context and will not try to take mmap_sem and handle the 448 * fault, so additional pagefault_enable()/disable() calls are not 449 * needed. 450 * 451 * The access can't be done via copy_from_user() here because 452 * vc_read_mem() must not use string instructions to access unsafe 453 * memory. The reason is that MOVS is emulated by the #VC handler by 454 * splitting the move up into a read and a write and taking a nested #VC 455 * exception on whatever of them is the MMIO access. Using string 456 * instructions here would cause infinite nesting. 457 */ 458 switch (size) { 459 case 1: { 460 u8 d1; 461 u8 __user *s = (u8 __user *)src; 462 463 if (__get_user(d1, s)) 464 goto fault; 465 memcpy(buf, &d1, 1); 466 break; 467 } 468 case 2: { 469 u16 d2; 470 u16 __user *s = (u16 __user *)src; 471 472 if (__get_user(d2, s)) 473 goto fault; 474 memcpy(buf, &d2, 2); 475 break; 476 } 477 case 4: { 478 u32 d4; 479 u32 __user *s = (u32 __user *)src; 480 481 if (__get_user(d4, s)) 482 goto fault; 483 memcpy(buf, &d4, 4); 484 break; 485 } 486 case 8: { 487 u64 d8; 488 u64 __user *s = (u64 __user *)src; 489 if (__get_user(d8, s)) 490 goto fault; 491 memcpy(buf, &d8, 8); 492 break; 493 } 494 default: 495 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 496 return ES_UNSUPPORTED; 497 } 498 499 return ES_OK; 500 501 fault: 502 if (user_mode(ctxt->regs)) 503 error_code |= X86_PF_USER; 504 505 ctxt->fi.vector = X86_TRAP_PF; 506 ctxt->fi.error_code = error_code; 507 ctxt->fi.cr2 = (unsigned long)src; 508 509 return ES_EXCEPTION; 510 } 511 512 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 513 unsigned long vaddr, phys_addr_t *paddr) 514 { 515 unsigned long va = (unsigned long)vaddr; 516 unsigned int level; 517 phys_addr_t pa; 518 pgd_t *pgd; 519 pte_t *pte; 520 521 pgd = __va(read_cr3_pa()); 522 pgd = &pgd[pgd_index(va)]; 523 pte = lookup_address_in_pgd(pgd, va, &level); 524 if (!pte) { 525 ctxt->fi.vector = X86_TRAP_PF; 526 ctxt->fi.cr2 = vaddr; 527 ctxt->fi.error_code = 0; 528 529 if (user_mode(ctxt->regs)) 530 ctxt->fi.error_code |= X86_PF_USER; 531 532 return ES_EXCEPTION; 533 } 534 535 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 536 /* Emulated MMIO to/from encrypted memory not supported */ 537 return ES_UNSUPPORTED; 538 539 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 540 pa |= va & ~page_level_mask(level); 541 542 *paddr = pa; 543 544 return ES_OK; 545 } 546 547 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) 548 { 549 BUG_ON(size > 4); 550 551 if (user_mode(ctxt->regs)) { 552 struct thread_struct *t = ¤t->thread; 553 struct io_bitmap *iobm = t->io_bitmap; 554 size_t idx; 555 556 if (!iobm) 557 goto fault; 558 559 for (idx = port; idx < port + size; ++idx) { 560 if (test_bit(idx, iobm->bitmap)) 561 goto fault; 562 } 563 } 564 565 return ES_OK; 566 567 fault: 568 ctxt->fi.vector = X86_TRAP_GP; 569 ctxt->fi.error_code = 0; 570 571 return ES_EXCEPTION; 572 } 573 574 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 575 { 576 long error_code = ctxt->fi.error_code; 577 int trapnr = ctxt->fi.vector; 578 579 ctxt->regs->orig_ax = ctxt->fi.error_code; 580 581 switch (trapnr) { 582 case X86_TRAP_GP: 583 exc_general_protection(ctxt->regs, error_code); 584 break; 585 case X86_TRAP_UD: 586 exc_invalid_op(ctxt->regs); 587 break; 588 case X86_TRAP_PF: 589 write_cr2(ctxt->fi.cr2); 590 exc_page_fault(ctxt->regs, error_code); 591 break; 592 case X86_TRAP_AC: 593 exc_alignment_check(ctxt->regs, error_code); 594 break; 595 default: 596 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 597 BUG(); 598 } 599 } 600 601 /* Include code shared with pre-decompression boot stage */ 602 #include "shared.c" 603 604 static inline struct svsm_ca *svsm_get_caa(void) 605 { 606 /* 607 * Use rIP-relative references when called early in the boot. If 608 * ->use_cas is set, then it is late in the boot and no need 609 * to worry about rIP-relative references. 610 */ 611 if (RIP_REL_REF(sev_cfg).use_cas) 612 return this_cpu_read(svsm_caa); 613 else 614 return RIP_REL_REF(boot_svsm_caa); 615 } 616 617 static u64 svsm_get_caa_pa(void) 618 { 619 /* 620 * Use rIP-relative references when called early in the boot. If 621 * ->use_cas is set, then it is late in the boot and no need 622 * to worry about rIP-relative references. 623 */ 624 if (RIP_REL_REF(sev_cfg).use_cas) 625 return this_cpu_read(svsm_caa_pa); 626 else 627 return RIP_REL_REF(boot_svsm_caa_pa); 628 } 629 630 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 631 { 632 struct sev_es_runtime_data *data; 633 struct ghcb *ghcb; 634 635 WARN_ON(!irqs_disabled()); 636 637 data = this_cpu_read(runtime_data); 638 ghcb = &data->ghcb_page; 639 640 if (state->ghcb) { 641 /* Restore GHCB from Backup */ 642 *ghcb = *state->ghcb; 643 data->backup_ghcb_active = false; 644 state->ghcb = NULL; 645 } else { 646 /* 647 * Invalidate the GHCB so a VMGEXIT instruction issued 648 * from userspace won't appear to be valid. 649 */ 650 vc_ghcb_invalidate(ghcb); 651 data->ghcb_active = false; 652 } 653 } 654 655 static int svsm_perform_call_protocol(struct svsm_call *call) 656 { 657 struct ghcb_state state; 658 unsigned long flags; 659 struct ghcb *ghcb; 660 int ret; 661 662 /* 663 * This can be called very early in the boot, use native functions in 664 * order to avoid paravirt issues. 665 */ 666 flags = native_local_irq_save(); 667 668 /* 669 * Use rip-relative references when called early in the boot. If 670 * ghcbs_initialized is set, then it is late in the boot and no need 671 * to worry about rip-relative references in called functions. 672 */ 673 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 674 ghcb = __sev_get_ghcb(&state); 675 else if (RIP_REL_REF(boot_ghcb)) 676 ghcb = RIP_REL_REF(boot_ghcb); 677 else 678 ghcb = NULL; 679 680 do { 681 ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) 682 : svsm_perform_msr_protocol(call); 683 } while (ret == -EAGAIN); 684 685 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 686 __sev_put_ghcb(&state); 687 688 native_local_irq_restore(flags); 689 690 return ret; 691 } 692 693 void noinstr __sev_es_nmi_complete(void) 694 { 695 struct ghcb_state state; 696 struct ghcb *ghcb; 697 698 ghcb = __sev_get_ghcb(&state); 699 700 vc_ghcb_invalidate(ghcb); 701 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 702 ghcb_set_sw_exit_info_1(ghcb, 0); 703 ghcb_set_sw_exit_info_2(ghcb, 0); 704 705 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 706 VMGEXIT(); 707 708 __sev_put_ghcb(&state); 709 } 710 711 static u64 __init get_snp_jump_table_addr(void) 712 { 713 struct snp_secrets_page *secrets; 714 void __iomem *mem; 715 u64 addr; 716 717 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 718 if (!mem) { 719 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 720 return 0; 721 } 722 723 secrets = (__force struct snp_secrets_page *)mem; 724 725 addr = secrets->os_area.ap_jump_table_pa; 726 iounmap(mem); 727 728 return addr; 729 } 730 731 static u64 __init get_jump_table_addr(void) 732 { 733 struct ghcb_state state; 734 unsigned long flags; 735 struct ghcb *ghcb; 736 u64 ret = 0; 737 738 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 739 return get_snp_jump_table_addr(); 740 741 local_irq_save(flags); 742 743 ghcb = __sev_get_ghcb(&state); 744 745 vc_ghcb_invalidate(ghcb); 746 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 747 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 748 ghcb_set_sw_exit_info_2(ghcb, 0); 749 750 sev_es_wr_ghcb_msr(__pa(ghcb)); 751 VMGEXIT(); 752 753 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 754 ghcb_sw_exit_info_2_is_valid(ghcb)) 755 ret = ghcb->save.sw_exit_info_2; 756 757 __sev_put_ghcb(&state); 758 759 local_irq_restore(flags); 760 761 return ret; 762 } 763 764 static void __head 765 early_set_pages_state(unsigned long vaddr, unsigned long paddr, 766 unsigned long npages, enum psc_op op) 767 { 768 unsigned long paddr_end; 769 u64 val; 770 771 vaddr = vaddr & PAGE_MASK; 772 773 paddr = paddr & PAGE_MASK; 774 paddr_end = paddr + (npages << PAGE_SHIFT); 775 776 while (paddr < paddr_end) { 777 /* Page validation must be rescinded before changing to shared */ 778 if (op == SNP_PAGE_STATE_SHARED) 779 pvalidate_4k_page(vaddr, paddr, false); 780 781 /* 782 * Use the MSR protocol because this function can be called before 783 * the GHCB is established. 784 */ 785 sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); 786 VMGEXIT(); 787 788 val = sev_es_rd_ghcb_msr(); 789 790 if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) 791 goto e_term; 792 793 if (GHCB_MSR_PSC_RESP_VAL(val)) 794 goto e_term; 795 796 /* Page validation must be performed after changing to private */ 797 if (op == SNP_PAGE_STATE_PRIVATE) 798 pvalidate_4k_page(vaddr, paddr, true); 799 800 vaddr += PAGE_SIZE; 801 paddr += PAGE_SIZE; 802 } 803 804 return; 805 806 e_term: 807 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 808 } 809 810 void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, 811 unsigned long npages) 812 { 813 /* 814 * This can be invoked in early boot while running identity mapped, so 815 * use an open coded check for SNP instead of using cc_platform_has(). 816 * This eliminates worries about jump tables or checking boot_cpu_data 817 * in the cc_platform_has() function. 818 */ 819 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 820 return; 821 822 /* 823 * Ask the hypervisor to mark the memory pages as private in the RMP 824 * table. 825 */ 826 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); 827 } 828 829 void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, 830 unsigned long npages) 831 { 832 /* 833 * This can be invoked in early boot while running identity mapped, so 834 * use an open coded check for SNP instead of using cc_platform_has(). 835 * This eliminates worries about jump tables or checking boot_cpu_data 836 * in the cc_platform_has() function. 837 */ 838 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 839 return; 840 841 /* Ask hypervisor to mark the memory pages shared in the RMP table. */ 842 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); 843 } 844 845 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 846 unsigned long vaddr_end, int op) 847 { 848 struct ghcb_state state; 849 bool use_large_entry; 850 struct psc_hdr *hdr; 851 struct psc_entry *e; 852 unsigned long flags; 853 unsigned long pfn; 854 struct ghcb *ghcb; 855 int i; 856 857 hdr = &data->hdr; 858 e = data->entries; 859 860 memset(data, 0, sizeof(*data)); 861 i = 0; 862 863 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 864 hdr->end_entry = i; 865 866 if (is_vmalloc_addr((void *)vaddr)) { 867 pfn = vmalloc_to_pfn((void *)vaddr); 868 use_large_entry = false; 869 } else { 870 pfn = __pa(vaddr) >> PAGE_SHIFT; 871 use_large_entry = true; 872 } 873 874 e->gfn = pfn; 875 e->operation = op; 876 877 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 878 (vaddr_end - vaddr) >= PMD_SIZE) { 879 e->pagesize = RMP_PG_SIZE_2M; 880 vaddr += PMD_SIZE; 881 } else { 882 e->pagesize = RMP_PG_SIZE_4K; 883 vaddr += PAGE_SIZE; 884 } 885 886 e++; 887 i++; 888 } 889 890 /* Page validation must be rescinded before changing to shared */ 891 if (op == SNP_PAGE_STATE_SHARED) 892 pvalidate_pages(data); 893 894 local_irq_save(flags); 895 896 if (sev_cfg.ghcbs_initialized) 897 ghcb = __sev_get_ghcb(&state); 898 else 899 ghcb = boot_ghcb; 900 901 /* Invoke the hypervisor to perform the page state changes */ 902 if (!ghcb || vmgexit_psc(ghcb, data)) 903 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 904 905 if (sev_cfg.ghcbs_initialized) 906 __sev_put_ghcb(&state); 907 908 local_irq_restore(flags); 909 910 /* Page validation must be performed after changing to private */ 911 if (op == SNP_PAGE_STATE_PRIVATE) 912 pvalidate_pages(data); 913 914 return vaddr; 915 } 916 917 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 918 { 919 struct snp_psc_desc desc; 920 unsigned long vaddr_end; 921 922 /* Use the MSR protocol when a GHCB is not available. */ 923 if (!boot_ghcb) 924 return early_set_pages_state(vaddr, __pa(vaddr), npages, op); 925 926 vaddr = vaddr & PAGE_MASK; 927 vaddr_end = vaddr + (npages << PAGE_SHIFT); 928 929 while (vaddr < vaddr_end) 930 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 931 } 932 933 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 934 { 935 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 936 return; 937 938 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 939 } 940 941 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 942 { 943 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 944 return; 945 946 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 947 } 948 949 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 950 { 951 unsigned long vaddr, npages; 952 953 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 954 return; 955 956 vaddr = (unsigned long)__va(start); 957 npages = (end - start) >> PAGE_SHIFT; 958 959 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 960 } 961 962 static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id) 963 { 964 bool create = event != SVM_VMGEXIT_AP_DESTROY; 965 struct ghcb_state state; 966 unsigned long flags; 967 struct ghcb *ghcb; 968 int ret = 0; 969 970 local_irq_save(flags); 971 972 ghcb = __sev_get_ghcb(&state); 973 974 vc_ghcb_invalidate(ghcb); 975 976 if (create) 977 ghcb_set_rax(ghcb, vmsa->sev_features); 978 979 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 980 ghcb_set_sw_exit_info_1(ghcb, 981 ((u64)apic_id << 32) | 982 ((u64)snp_vmpl << 16) | 983 event); 984 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 985 986 sev_es_wr_ghcb_msr(__pa(ghcb)); 987 VMGEXIT(); 988 989 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 990 lower_32_bits(ghcb->save.sw_exit_info_1)) { 991 pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY")); 992 ret = -EINVAL; 993 } 994 995 __sev_put_ghcb(&state); 996 997 local_irq_restore(flags); 998 999 return ret; 1000 } 1001 1002 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1003 { 1004 int ret; 1005 1006 if (snp_vmpl) { 1007 struct svsm_call call = {}; 1008 unsigned long flags; 1009 1010 local_irq_save(flags); 1011 1012 call.caa = this_cpu_read(svsm_caa); 1013 call.rcx = __pa(va); 1014 1015 if (make_vmsa) { 1016 /* Protocol 0, Call ID 2 */ 1017 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1018 call.rdx = __pa(caa); 1019 call.r8 = apic_id; 1020 } else { 1021 /* Protocol 0, Call ID 3 */ 1022 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1023 } 1024 1025 ret = svsm_perform_call_protocol(&call); 1026 1027 local_irq_restore(flags); 1028 } else { 1029 /* 1030 * If the kernel runs at VMPL0, it can change the VMSA 1031 * bit for a page using the RMPADJUST instruction. 1032 * However, for the instruction to succeed it must 1033 * target the permissions of a lesser privileged (higher 1034 * numbered) VMPL level, so use VMPL1. 1035 */ 1036 u64 attrs = 1; 1037 1038 if (make_vmsa) 1039 attrs |= RMPADJUST_VMSA_PAGE_BIT; 1040 1041 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1042 } 1043 1044 return ret; 1045 } 1046 1047 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1048 { 1049 int err; 1050 1051 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1052 if (err) 1053 pr_err("clear VMSA page failed (%u), leaking page\n", err); 1054 else 1055 free_page((unsigned long)vmsa); 1056 } 1057 1058 static void set_pte_enc(pte_t *kpte, int level, void *va) 1059 { 1060 struct pte_enc_desc d = { 1061 .kpte = kpte, 1062 .pte_level = level, 1063 .va = va, 1064 .encrypt = true 1065 }; 1066 1067 prepare_pte_enc(&d); 1068 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 1069 } 1070 1071 static void unshare_all_memory(void) 1072 { 1073 unsigned long addr, end, size, ghcb; 1074 struct sev_es_runtime_data *data; 1075 unsigned int npages, level; 1076 bool skipped_addr; 1077 pte_t *pte; 1078 int cpu; 1079 1080 /* Unshare the direct mapping. */ 1081 addr = PAGE_OFFSET; 1082 end = PAGE_OFFSET + get_max_mapped(); 1083 1084 while (addr < end) { 1085 pte = lookup_address(addr, &level); 1086 size = page_level_size(level); 1087 npages = size / PAGE_SIZE; 1088 skipped_addr = false; 1089 1090 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 1091 addr += size; 1092 continue; 1093 } 1094 1095 /* 1096 * Ensure that all the per-CPU GHCBs are made private at the 1097 * end of the unsharing loop so that the switch to the slower 1098 * MSR protocol happens last. 1099 */ 1100 for_each_possible_cpu(cpu) { 1101 data = per_cpu(runtime_data, cpu); 1102 ghcb = (unsigned long)&data->ghcb_page; 1103 1104 /* Handle the case of a huge page containing the GHCB page */ 1105 if (addr <= ghcb && ghcb < addr + size) { 1106 skipped_addr = true; 1107 break; 1108 } 1109 } 1110 1111 if (!skipped_addr) { 1112 set_pte_enc(pte, level, (void *)addr); 1113 snp_set_memory_private(addr, npages); 1114 } 1115 addr += size; 1116 } 1117 1118 /* Unshare all bss decrypted memory. */ 1119 addr = (unsigned long)__start_bss_decrypted; 1120 end = (unsigned long)__start_bss_decrypted_unused; 1121 npages = (end - addr) >> PAGE_SHIFT; 1122 1123 for (; addr < end; addr += PAGE_SIZE) { 1124 pte = lookup_address(addr, &level); 1125 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 1126 continue; 1127 1128 set_pte_enc(pte, level, (void *)addr); 1129 } 1130 addr = (unsigned long)__start_bss_decrypted; 1131 snp_set_memory_private(addr, npages); 1132 1133 __flush_tlb_all(); 1134 } 1135 1136 /* Stop new private<->shared conversions */ 1137 void snp_kexec_begin(void) 1138 { 1139 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1140 return; 1141 1142 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1143 return; 1144 1145 /* 1146 * Crash kernel ends up here with interrupts disabled: can't wait for 1147 * conversions to finish. 1148 * 1149 * If race happened, just report and proceed. 1150 */ 1151 if (!set_memory_enc_stop_conversion()) 1152 pr_warn("Failed to stop shared<->private conversions\n"); 1153 } 1154 1155 /* 1156 * Shutdown all APs except the one handling kexec/kdump and clearing 1157 * the VMSA tag on AP's VMSA pages as they are not being used as 1158 * VMSA page anymore. 1159 */ 1160 static void shutdown_all_aps(void) 1161 { 1162 struct sev_es_save_area *vmsa; 1163 int apic_id, this_cpu, cpu; 1164 1165 this_cpu = get_cpu(); 1166 1167 /* 1168 * APs are already in HLT loop when enc_kexec_finish() callback 1169 * is invoked. 1170 */ 1171 for_each_present_cpu(cpu) { 1172 vmsa = per_cpu(sev_vmsa, cpu); 1173 1174 /* 1175 * The BSP or offlined APs do not have guest allocated VMSA 1176 * and there is no need to clear the VMSA tag for this page. 1177 */ 1178 if (!vmsa) 1179 continue; 1180 1181 /* 1182 * Cannot clear the VMSA tag for the currently running vCPU. 1183 */ 1184 if (this_cpu == cpu) { 1185 unsigned long pa; 1186 struct page *p; 1187 1188 pa = __pa(vmsa); 1189 /* 1190 * Mark the VMSA page of the running vCPU as offline 1191 * so that is excluded and not touched by makedumpfile 1192 * while generating vmcore during kdump. 1193 */ 1194 p = pfn_to_online_page(pa >> PAGE_SHIFT); 1195 if (p) 1196 __SetPageOffline(p); 1197 continue; 1198 } 1199 1200 apic_id = cpuid_to_apicid[cpu]; 1201 1202 /* 1203 * Issue AP destroy to ensure AP gets kicked out of guest mode 1204 * to allow using RMPADJUST to remove the VMSA tag on it's 1205 * VMSA page. 1206 */ 1207 vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id); 1208 snp_cleanup_vmsa(vmsa, apic_id); 1209 } 1210 1211 put_cpu(); 1212 } 1213 1214 void snp_kexec_finish(void) 1215 { 1216 struct sev_es_runtime_data *data; 1217 unsigned long size, addr; 1218 unsigned int level, cpu; 1219 struct ghcb *ghcb; 1220 pte_t *pte; 1221 1222 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1223 return; 1224 1225 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1226 return; 1227 1228 shutdown_all_aps(); 1229 1230 unshare_all_memory(); 1231 1232 /* 1233 * Switch to using the MSR protocol to change per-CPU GHCBs to 1234 * private. All the per-CPU GHCBs have been switched back to private, 1235 * so can't do any more GHCB calls to the hypervisor beyond this point 1236 * until the kexec'ed kernel starts running. 1237 */ 1238 boot_ghcb = NULL; 1239 sev_cfg.ghcbs_initialized = false; 1240 1241 for_each_possible_cpu(cpu) { 1242 data = per_cpu(runtime_data, cpu); 1243 ghcb = &data->ghcb_page; 1244 pte = lookup_address((unsigned long)ghcb, &level); 1245 size = page_level_size(level); 1246 /* Handle the case of a huge page containing the GHCB page */ 1247 addr = (unsigned long)ghcb & page_level_mask(level); 1248 set_pte_enc(pte, level, (void *)addr); 1249 snp_set_memory_private(addr, (size / PAGE_SIZE)); 1250 } 1251 } 1252 1253 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 1254 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 1255 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 1256 1257 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 1258 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 1259 1260 static void *snp_alloc_vmsa_page(int cpu) 1261 { 1262 struct page *p; 1263 1264 /* 1265 * Allocate VMSA page to work around the SNP erratum where the CPU will 1266 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 1267 * collides with the RMP entry of VMSA page. The recommended workaround 1268 * is to not use a large page. 1269 * 1270 * Allocate an 8k page which is also 8k-aligned. 1271 */ 1272 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 1273 if (!p) 1274 return NULL; 1275 1276 split_page(p, 1); 1277 1278 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 1279 __free_page(p); 1280 1281 return page_address(p + 1); 1282 } 1283 1284 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) 1285 { 1286 struct sev_es_save_area *cur_vmsa, *vmsa; 1287 struct svsm_ca *caa; 1288 u8 sipi_vector; 1289 int cpu, ret; 1290 u64 cr4; 1291 1292 /* 1293 * The hypervisor SNP feature support check has happened earlier, just check 1294 * the AP_CREATION one here. 1295 */ 1296 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 1297 return -EOPNOTSUPP; 1298 1299 /* 1300 * Verify the desired start IP against the known trampoline start IP 1301 * to catch any future new trampolines that may be introduced that 1302 * would require a new protected guest entry point. 1303 */ 1304 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 1305 "Unsupported SNP start_ip: %lx\n", start_ip)) 1306 return -EINVAL; 1307 1308 /* Override start_ip with known protected guest start IP */ 1309 start_ip = real_mode_header->sev_es_trampoline_start; 1310 1311 /* Find the logical CPU for the APIC ID */ 1312 for_each_present_cpu(cpu) { 1313 if (arch_match_cpu_phys_id(cpu, apic_id)) 1314 break; 1315 } 1316 if (cpu >= nr_cpu_ids) 1317 return -EINVAL; 1318 1319 cur_vmsa = per_cpu(sev_vmsa, cpu); 1320 1321 /* 1322 * A new VMSA is created each time because there is no guarantee that 1323 * the current VMSA is the kernels or that the vCPU is not running. If 1324 * an attempt was done to use the current VMSA with a running vCPU, a 1325 * #VMEXIT of that vCPU would wipe out all of the settings being done 1326 * here. 1327 */ 1328 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 1329 if (!vmsa) 1330 return -ENOMEM; 1331 1332 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 1333 caa = per_cpu(svsm_caa, cpu); 1334 1335 /* CR4 should maintain the MCE value */ 1336 cr4 = native_read_cr4() & X86_CR4_MCE; 1337 1338 /* Set the CS value based on the start_ip converted to a SIPI vector */ 1339 sipi_vector = (start_ip >> 12); 1340 vmsa->cs.base = sipi_vector << 12; 1341 vmsa->cs.limit = AP_INIT_CS_LIMIT; 1342 vmsa->cs.attrib = INIT_CS_ATTRIBS; 1343 vmsa->cs.selector = sipi_vector << 8; 1344 1345 /* Set the RIP value based on start_ip */ 1346 vmsa->rip = start_ip & 0xfff; 1347 1348 /* Set AP INIT defaults as documented in the APM */ 1349 vmsa->ds.limit = AP_INIT_DS_LIMIT; 1350 vmsa->ds.attrib = INIT_DS_ATTRIBS; 1351 vmsa->es = vmsa->ds; 1352 vmsa->fs = vmsa->ds; 1353 vmsa->gs = vmsa->ds; 1354 vmsa->ss = vmsa->ds; 1355 1356 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 1357 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 1358 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 1359 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 1360 vmsa->tr.limit = AP_INIT_TR_LIMIT; 1361 vmsa->tr.attrib = INIT_TR_ATTRIBS; 1362 1363 vmsa->cr4 = cr4; 1364 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 1365 vmsa->dr7 = DR7_RESET_VALUE; 1366 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 1367 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 1368 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 1369 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 1370 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 1371 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 1372 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 1373 1374 /* SVME must be set. */ 1375 vmsa->efer = EFER_SVME; 1376 1377 /* 1378 * Set the SNP-specific fields for this VMSA: 1379 * VMPL level 1380 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 1381 */ 1382 vmsa->vmpl = snp_vmpl; 1383 vmsa->sev_features = sev_status >> 2; 1384 1385 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 1386 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 1387 vmsa->tsc_scale = snp_tsc_scale; 1388 vmsa->tsc_offset = snp_tsc_offset; 1389 } 1390 1391 /* Switch the page over to a VMSA page now that it is initialized */ 1392 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 1393 if (ret) { 1394 pr_err("set VMSA page failed (%u)\n", ret); 1395 free_page((unsigned long)vmsa); 1396 1397 return -EINVAL; 1398 } 1399 1400 /* Issue VMGEXIT AP Creation NAE event */ 1401 ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id); 1402 if (ret) { 1403 snp_cleanup_vmsa(vmsa, apic_id); 1404 vmsa = NULL; 1405 } 1406 1407 /* Free up any previous VMSA page */ 1408 if (cur_vmsa) 1409 snp_cleanup_vmsa(cur_vmsa, apic_id); 1410 1411 /* Record the current VMSA page */ 1412 per_cpu(sev_vmsa, cpu) = vmsa; 1413 1414 return ret; 1415 } 1416 1417 void __init snp_set_wakeup_secondary_cpu(void) 1418 { 1419 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1420 return; 1421 1422 /* 1423 * Always set this override if SNP is enabled. This makes it the 1424 * required method to start APs under SNP. If the hypervisor does 1425 * not support AP creation, then no APs will be started. 1426 */ 1427 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 1428 } 1429 1430 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 1431 { 1432 u16 startup_cs, startup_ip; 1433 phys_addr_t jump_table_pa; 1434 u64 jump_table_addr; 1435 u16 __iomem *jump_table; 1436 1437 jump_table_addr = get_jump_table_addr(); 1438 1439 /* On UP guests there is no jump table so this is not a failure */ 1440 if (!jump_table_addr) 1441 return 0; 1442 1443 /* Check if AP Jump Table is page-aligned */ 1444 if (jump_table_addr & ~PAGE_MASK) 1445 return -EINVAL; 1446 1447 jump_table_pa = jump_table_addr & PAGE_MASK; 1448 1449 startup_cs = (u16)(rmh->trampoline_start >> 4); 1450 startup_ip = (u16)(rmh->sev_es_trampoline_start - 1451 rmh->trampoline_start); 1452 1453 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 1454 if (!jump_table) 1455 return -EIO; 1456 1457 writew(startup_ip, &jump_table[0]); 1458 writew(startup_cs, &jump_table[1]); 1459 1460 iounmap(jump_table); 1461 1462 return 0; 1463 } 1464 1465 /* 1466 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 1467 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 1468 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 1469 */ 1470 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 1471 { 1472 struct sev_es_runtime_data *data; 1473 unsigned long address, pflags; 1474 int cpu; 1475 u64 pfn; 1476 1477 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1478 return 0; 1479 1480 pflags = _PAGE_NX | _PAGE_RW; 1481 1482 for_each_possible_cpu(cpu) { 1483 data = per_cpu(runtime_data, cpu); 1484 1485 address = __pa(&data->ghcb_page); 1486 pfn = address >> PAGE_SHIFT; 1487 1488 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 1489 return 1; 1490 } 1491 1492 return 0; 1493 } 1494 1495 /* Writes to the SVSM CAA MSR are ignored */ 1496 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) 1497 { 1498 if (write) 1499 return ES_OK; 1500 1501 regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); 1502 regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); 1503 1504 return ES_OK; 1505 } 1506 1507 /* 1508 * TSC related accesses should not exit to the hypervisor when a guest is 1509 * executing with Secure TSC enabled, so special handling is required for 1510 * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. 1511 */ 1512 static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) 1513 { 1514 u64 tsc; 1515 1516 /* 1517 * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. 1518 * Terminate the SNP guest when the interception is enabled. 1519 */ 1520 if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) 1521 return ES_VMM_ERROR; 1522 1523 /* 1524 * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC 1525 * to return undefined values, so ignore all writes. 1526 * 1527 * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use 1528 * the value returned by rdtsc_ordered(). 1529 */ 1530 if (write) { 1531 WARN_ONCE(1, "TSC MSR writes are verboten!\n"); 1532 return ES_OK; 1533 } 1534 1535 tsc = rdtsc_ordered(); 1536 regs->ax = lower_32_bits(tsc); 1537 regs->dx = upper_32_bits(tsc); 1538 1539 return ES_OK; 1540 } 1541 1542 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1543 { 1544 struct pt_regs *regs = ctxt->regs; 1545 enum es_result ret; 1546 bool write; 1547 1548 /* Is it a WRMSR? */ 1549 write = ctxt->insn.opcode.bytes[1] == 0x30; 1550 1551 switch (regs->cx) { 1552 case MSR_SVSM_CAA: 1553 return __vc_handle_msr_caa(regs, write); 1554 case MSR_IA32_TSC: 1555 case MSR_AMD64_GUEST_TSC_FREQ: 1556 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 1557 return __vc_handle_secure_tsc_msrs(regs, write); 1558 break; 1559 default: 1560 break; 1561 } 1562 1563 ghcb_set_rcx(ghcb, regs->cx); 1564 if (write) { 1565 ghcb_set_rax(ghcb, regs->ax); 1566 ghcb_set_rdx(ghcb, regs->dx); 1567 } 1568 1569 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); 1570 1571 if ((ret == ES_OK) && !write) { 1572 regs->ax = ghcb->save.rax; 1573 regs->dx = ghcb->save.rdx; 1574 } 1575 1576 return ret; 1577 } 1578 1579 static void snp_register_per_cpu_ghcb(void) 1580 { 1581 struct sev_es_runtime_data *data; 1582 struct ghcb *ghcb; 1583 1584 data = this_cpu_read(runtime_data); 1585 ghcb = &data->ghcb_page; 1586 1587 snp_register_ghcb_early(__pa(ghcb)); 1588 } 1589 1590 void setup_ghcb(void) 1591 { 1592 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1593 return; 1594 1595 /* 1596 * Check whether the runtime #VC exception handler is active. It uses 1597 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1598 * 1599 * If SNP is active, register the per-CPU GHCB page so that the runtime 1600 * exception handler can use it. 1601 */ 1602 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1603 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1604 snp_register_per_cpu_ghcb(); 1605 1606 sev_cfg.ghcbs_initialized = true; 1607 1608 return; 1609 } 1610 1611 /* 1612 * Make sure the hypervisor talks a supported protocol. 1613 * This gets called only in the BSP boot phase. 1614 */ 1615 if (!sev_es_negotiate_protocol()) 1616 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1617 1618 /* 1619 * Clear the boot_ghcb. The first exception comes in before the bss 1620 * section is cleared. 1621 */ 1622 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1623 1624 /* Alright - Make the boot-ghcb public */ 1625 boot_ghcb = &boot_ghcb_page; 1626 1627 /* SNP guest requires that GHCB GPA must be registered. */ 1628 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1629 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1630 } 1631 1632 #ifdef CONFIG_HOTPLUG_CPU 1633 static void sev_es_ap_hlt_loop(void) 1634 { 1635 struct ghcb_state state; 1636 struct ghcb *ghcb; 1637 1638 ghcb = __sev_get_ghcb(&state); 1639 1640 while (true) { 1641 vc_ghcb_invalidate(ghcb); 1642 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1643 ghcb_set_sw_exit_info_1(ghcb, 0); 1644 ghcb_set_sw_exit_info_2(ghcb, 0); 1645 1646 sev_es_wr_ghcb_msr(__pa(ghcb)); 1647 VMGEXIT(); 1648 1649 /* Wakeup signal? */ 1650 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1651 ghcb->save.sw_exit_info_2) 1652 break; 1653 } 1654 1655 __sev_put_ghcb(&state); 1656 } 1657 1658 /* 1659 * Play_dead handler when running under SEV-ES. This is needed because 1660 * the hypervisor can't deliver an SIPI request to restart the AP. 1661 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1662 * hypervisor wakes it up again. 1663 */ 1664 static void sev_es_play_dead(void) 1665 { 1666 play_dead_common(); 1667 1668 /* IRQs now disabled */ 1669 1670 sev_es_ap_hlt_loop(); 1671 1672 /* 1673 * If we get here, the VCPU was woken up again. Jump to CPU 1674 * startup code to get it back online. 1675 */ 1676 soft_restart_cpu(); 1677 } 1678 #else /* CONFIG_HOTPLUG_CPU */ 1679 #define sev_es_play_dead native_play_dead 1680 #endif /* CONFIG_HOTPLUG_CPU */ 1681 1682 #ifdef CONFIG_SMP 1683 static void __init sev_es_setup_play_dead(void) 1684 { 1685 smp_ops.play_dead = sev_es_play_dead; 1686 } 1687 #else 1688 static inline void sev_es_setup_play_dead(void) { } 1689 #endif 1690 1691 static void __init alloc_runtime_data(int cpu) 1692 { 1693 struct sev_es_runtime_data *data; 1694 1695 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1696 if (!data) 1697 panic("Can't allocate SEV-ES runtime data"); 1698 1699 per_cpu(runtime_data, cpu) = data; 1700 1701 if (snp_vmpl) { 1702 struct svsm_ca *caa; 1703 1704 /* Allocate the SVSM CA page if an SVSM is present */ 1705 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE); 1706 1707 per_cpu(svsm_caa, cpu) = caa; 1708 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1709 } 1710 } 1711 1712 static void __init init_ghcb(int cpu) 1713 { 1714 struct sev_es_runtime_data *data; 1715 int err; 1716 1717 data = per_cpu(runtime_data, cpu); 1718 1719 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1720 sizeof(data->ghcb_page)); 1721 if (err) 1722 panic("Can't map GHCBs unencrypted"); 1723 1724 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1725 1726 data->ghcb_active = false; 1727 data->backup_ghcb_active = false; 1728 } 1729 1730 void __init sev_es_init_vc_handling(void) 1731 { 1732 int cpu; 1733 1734 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1735 1736 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1737 return; 1738 1739 if (!sev_es_check_cpu_features()) 1740 panic("SEV-ES CPU Features missing"); 1741 1742 /* 1743 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1744 * features. 1745 */ 1746 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1747 sev_hv_features = get_hv_features(); 1748 1749 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1750 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1751 } 1752 1753 /* Initialize per-cpu GHCB pages */ 1754 for_each_possible_cpu(cpu) { 1755 alloc_runtime_data(cpu); 1756 init_ghcb(cpu); 1757 } 1758 1759 /* If running under an SVSM, switch to the per-cpu CA */ 1760 if (snp_vmpl) { 1761 struct svsm_call call = {}; 1762 unsigned long flags; 1763 int ret; 1764 1765 local_irq_save(flags); 1766 1767 /* 1768 * SVSM_CORE_REMAP_CA call: 1769 * RAX = 0 (Protocol=0, CallID=0) 1770 * RCX = New CA GPA 1771 */ 1772 call.caa = svsm_get_caa(); 1773 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 1774 call.rcx = this_cpu_read(svsm_caa_pa); 1775 ret = svsm_perform_call_protocol(&call); 1776 if (ret) 1777 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", 1778 ret, call.rax_out); 1779 1780 sev_cfg.use_cas = true; 1781 1782 local_irq_restore(flags); 1783 } 1784 1785 sev_es_setup_play_dead(); 1786 1787 /* Secondary CPUs use the runtime #VC handler */ 1788 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1789 } 1790 1791 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 1792 { 1793 int trapnr = ctxt->fi.vector; 1794 1795 if (trapnr == X86_TRAP_PF) 1796 native_write_cr2(ctxt->fi.cr2); 1797 1798 ctxt->regs->orig_ax = ctxt->fi.error_code; 1799 do_early_exception(ctxt->regs, trapnr); 1800 } 1801 1802 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 1803 { 1804 long *reg_array; 1805 int offset; 1806 1807 reg_array = (long *)ctxt->regs; 1808 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 1809 1810 if (offset < 0) 1811 return NULL; 1812 1813 offset /= sizeof(long); 1814 1815 return reg_array + offset; 1816 } 1817 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 1818 unsigned int bytes, bool read) 1819 { 1820 u64 exit_code, exit_info_1, exit_info_2; 1821 unsigned long ghcb_pa = __pa(ghcb); 1822 enum es_result res; 1823 phys_addr_t paddr; 1824 void __user *ref; 1825 1826 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 1827 if (ref == (void __user *)-1L) 1828 return ES_UNSUPPORTED; 1829 1830 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 1831 1832 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 1833 if (res != ES_OK) { 1834 if (res == ES_EXCEPTION && !read) 1835 ctxt->fi.error_code |= X86_PF_WRITE; 1836 1837 return res; 1838 } 1839 1840 exit_info_1 = paddr; 1841 /* Can never be greater than 8 */ 1842 exit_info_2 = bytes; 1843 1844 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 1845 1846 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 1847 } 1848 1849 /* 1850 * The MOVS instruction has two memory operands, which raises the 1851 * problem that it is not known whether the access to the source or the 1852 * destination caused the #VC exception (and hence whether an MMIO read 1853 * or write operation needs to be emulated). 1854 * 1855 * Instead of playing games with walking page-tables and trying to guess 1856 * whether the source or destination is an MMIO range, split the move 1857 * into two operations, a read and a write with only one memory operand. 1858 * This will cause a nested #VC exception on the MMIO address which can 1859 * then be handled. 1860 * 1861 * This implementation has the benefit that it also supports MOVS where 1862 * source _and_ destination are MMIO regions. 1863 * 1864 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 1865 * rare operation. If it turns out to be a performance problem the split 1866 * operations can be moved to memcpy_fromio() and memcpy_toio(). 1867 */ 1868 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 1869 unsigned int bytes) 1870 { 1871 unsigned long ds_base, es_base; 1872 unsigned char *src, *dst; 1873 unsigned char buffer[8]; 1874 enum es_result ret; 1875 bool rep; 1876 int off; 1877 1878 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 1879 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 1880 1881 if (ds_base == -1L || es_base == -1L) { 1882 ctxt->fi.vector = X86_TRAP_GP; 1883 ctxt->fi.error_code = 0; 1884 return ES_EXCEPTION; 1885 } 1886 1887 src = ds_base + (unsigned char *)ctxt->regs->si; 1888 dst = es_base + (unsigned char *)ctxt->regs->di; 1889 1890 ret = vc_read_mem(ctxt, src, buffer, bytes); 1891 if (ret != ES_OK) 1892 return ret; 1893 1894 ret = vc_write_mem(ctxt, dst, buffer, bytes); 1895 if (ret != ES_OK) 1896 return ret; 1897 1898 if (ctxt->regs->flags & X86_EFLAGS_DF) 1899 off = -bytes; 1900 else 1901 off = bytes; 1902 1903 ctxt->regs->si += off; 1904 ctxt->regs->di += off; 1905 1906 rep = insn_has_rep_prefix(&ctxt->insn); 1907 if (rep) 1908 ctxt->regs->cx -= 1; 1909 1910 if (!rep || ctxt->regs->cx == 0) 1911 return ES_OK; 1912 else 1913 return ES_RETRY; 1914 } 1915 1916 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1917 { 1918 struct insn *insn = &ctxt->insn; 1919 enum insn_mmio_type mmio; 1920 unsigned int bytes = 0; 1921 enum es_result ret; 1922 u8 sign_byte; 1923 long *reg_data; 1924 1925 mmio = insn_decode_mmio(insn, &bytes); 1926 if (mmio == INSN_MMIO_DECODE_FAILED) 1927 return ES_DECODE_FAILED; 1928 1929 if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { 1930 reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); 1931 if (!reg_data) 1932 return ES_DECODE_FAILED; 1933 } 1934 1935 if (user_mode(ctxt->regs)) 1936 return ES_UNSUPPORTED; 1937 1938 switch (mmio) { 1939 case INSN_MMIO_WRITE: 1940 memcpy(ghcb->shared_buffer, reg_data, bytes); 1941 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1942 break; 1943 case INSN_MMIO_WRITE_IMM: 1944 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1945 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1946 break; 1947 case INSN_MMIO_READ: 1948 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1949 if (ret) 1950 break; 1951 1952 /* Zero-extend for 32-bit operation */ 1953 if (bytes == 4) 1954 *reg_data = 0; 1955 1956 memcpy(reg_data, ghcb->shared_buffer, bytes); 1957 break; 1958 case INSN_MMIO_READ_ZERO_EXTEND: 1959 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1960 if (ret) 1961 break; 1962 1963 /* Zero extend based on operand size */ 1964 memset(reg_data, 0, insn->opnd_bytes); 1965 memcpy(reg_data, ghcb->shared_buffer, bytes); 1966 break; 1967 case INSN_MMIO_READ_SIGN_EXTEND: 1968 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1969 if (ret) 1970 break; 1971 1972 if (bytes == 1) { 1973 u8 *val = (u8 *)ghcb->shared_buffer; 1974 1975 sign_byte = (*val & 0x80) ? 0xff : 0x00; 1976 } else { 1977 u16 *val = (u16 *)ghcb->shared_buffer; 1978 1979 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 1980 } 1981 1982 /* Sign extend based on operand size */ 1983 memset(reg_data, sign_byte, insn->opnd_bytes); 1984 memcpy(reg_data, ghcb->shared_buffer, bytes); 1985 break; 1986 case INSN_MMIO_MOVS: 1987 ret = vc_handle_mmio_movs(ctxt, bytes); 1988 break; 1989 default: 1990 ret = ES_UNSUPPORTED; 1991 break; 1992 } 1993 1994 return ret; 1995 } 1996 1997 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1998 struct es_em_ctxt *ctxt) 1999 { 2000 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 2001 long val, *reg = vc_insn_get_rm(ctxt); 2002 enum es_result ret; 2003 2004 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 2005 return ES_VMM_ERROR; 2006 2007 if (!reg) 2008 return ES_DECODE_FAILED; 2009 2010 val = *reg; 2011 2012 /* Upper 32 bits must be written as zeroes */ 2013 if (val >> 32) { 2014 ctxt->fi.vector = X86_TRAP_GP; 2015 ctxt->fi.error_code = 0; 2016 return ES_EXCEPTION; 2017 } 2018 2019 /* Clear out other reserved bits and set bit 10 */ 2020 val = (val & 0xffff23ffL) | BIT(10); 2021 2022 /* Early non-zero writes to DR7 are not supported */ 2023 if (!data && (val & ~DR7_RESET_VALUE)) 2024 return ES_UNSUPPORTED; 2025 2026 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 2027 ghcb_set_rax(ghcb, val); 2028 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 2029 if (ret != ES_OK) 2030 return ret; 2031 2032 if (data) 2033 data->dr7 = val; 2034 2035 return ES_OK; 2036 } 2037 2038 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 2039 struct es_em_ctxt *ctxt) 2040 { 2041 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 2042 long *reg = vc_insn_get_rm(ctxt); 2043 2044 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 2045 return ES_VMM_ERROR; 2046 2047 if (!reg) 2048 return ES_DECODE_FAILED; 2049 2050 if (data) 2051 *reg = data->dr7; 2052 else 2053 *reg = DR7_RESET_VALUE; 2054 2055 return ES_OK; 2056 } 2057 2058 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 2059 struct es_em_ctxt *ctxt) 2060 { 2061 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 2062 } 2063 2064 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 2065 { 2066 enum es_result ret; 2067 2068 ghcb_set_rcx(ghcb, ctxt->regs->cx); 2069 2070 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 2071 if (ret != ES_OK) 2072 return ret; 2073 2074 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 2075 return ES_VMM_ERROR; 2076 2077 ctxt->regs->ax = ghcb->save.rax; 2078 ctxt->regs->dx = ghcb->save.rdx; 2079 2080 return ES_OK; 2081 } 2082 2083 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 2084 struct es_em_ctxt *ctxt) 2085 { 2086 /* 2087 * Treat it as a NOP and do not leak a physical address to the 2088 * hypervisor. 2089 */ 2090 return ES_OK; 2091 } 2092 2093 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 2094 struct es_em_ctxt *ctxt) 2095 { 2096 /* Treat the same as MONITOR/MONITORX */ 2097 return ES_OK; 2098 } 2099 2100 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 2101 struct es_em_ctxt *ctxt) 2102 { 2103 enum es_result ret; 2104 2105 ghcb_set_rax(ghcb, ctxt->regs->ax); 2106 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 2107 2108 if (x86_platform.hyper.sev_es_hcall_prepare) 2109 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 2110 2111 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 2112 if (ret != ES_OK) 2113 return ret; 2114 2115 if (!ghcb_rax_is_valid(ghcb)) 2116 return ES_VMM_ERROR; 2117 2118 ctxt->regs->ax = ghcb->save.rax; 2119 2120 /* 2121 * Call sev_es_hcall_finish() after regs->ax is already set. 2122 * This allows the hypervisor handler to overwrite it again if 2123 * necessary. 2124 */ 2125 if (x86_platform.hyper.sev_es_hcall_finish && 2126 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 2127 return ES_VMM_ERROR; 2128 2129 return ES_OK; 2130 } 2131 2132 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 2133 struct es_em_ctxt *ctxt) 2134 { 2135 /* 2136 * Calling ecx_alignment_check() directly does not work, because it 2137 * enables IRQs and the GHCB is active. Forward the exception and call 2138 * it later from vc_forward_exception(). 2139 */ 2140 ctxt->fi.vector = X86_TRAP_AC; 2141 ctxt->fi.error_code = 0; 2142 return ES_EXCEPTION; 2143 } 2144 2145 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 2146 struct ghcb *ghcb, 2147 unsigned long exit_code) 2148 { 2149 enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); 2150 2151 if (result != ES_OK) 2152 return result; 2153 2154 switch (exit_code) { 2155 case SVM_EXIT_READ_DR7: 2156 result = vc_handle_dr7_read(ghcb, ctxt); 2157 break; 2158 case SVM_EXIT_WRITE_DR7: 2159 result = vc_handle_dr7_write(ghcb, ctxt); 2160 break; 2161 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 2162 result = vc_handle_trap_ac(ghcb, ctxt); 2163 break; 2164 case SVM_EXIT_RDTSC: 2165 case SVM_EXIT_RDTSCP: 2166 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 2167 break; 2168 case SVM_EXIT_RDPMC: 2169 result = vc_handle_rdpmc(ghcb, ctxt); 2170 break; 2171 case SVM_EXIT_INVD: 2172 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 2173 result = ES_UNSUPPORTED; 2174 break; 2175 case SVM_EXIT_CPUID: 2176 result = vc_handle_cpuid(ghcb, ctxt); 2177 break; 2178 case SVM_EXIT_IOIO: 2179 result = vc_handle_ioio(ghcb, ctxt); 2180 break; 2181 case SVM_EXIT_MSR: 2182 result = vc_handle_msr(ghcb, ctxt); 2183 break; 2184 case SVM_EXIT_VMMCALL: 2185 result = vc_handle_vmmcall(ghcb, ctxt); 2186 break; 2187 case SVM_EXIT_WBINVD: 2188 result = vc_handle_wbinvd(ghcb, ctxt); 2189 break; 2190 case SVM_EXIT_MONITOR: 2191 result = vc_handle_monitor(ghcb, ctxt); 2192 break; 2193 case SVM_EXIT_MWAIT: 2194 result = vc_handle_mwait(ghcb, ctxt); 2195 break; 2196 case SVM_EXIT_NPF: 2197 result = vc_handle_mmio(ghcb, ctxt); 2198 break; 2199 default: 2200 /* 2201 * Unexpected #VC exception 2202 */ 2203 result = ES_UNSUPPORTED; 2204 } 2205 2206 return result; 2207 } 2208 2209 static __always_inline bool is_vc2_stack(unsigned long sp) 2210 { 2211 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 2212 } 2213 2214 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 2215 { 2216 unsigned long sp, prev_sp; 2217 2218 sp = (unsigned long)regs; 2219 prev_sp = regs->sp; 2220 2221 /* 2222 * If the code was already executing on the VC2 stack when the #VC 2223 * happened, let it proceed to the normal handling routine. This way the 2224 * code executing on the VC2 stack can cause #VC exceptions to get handled. 2225 */ 2226 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 2227 } 2228 2229 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 2230 { 2231 struct ghcb_state state; 2232 struct es_em_ctxt ctxt; 2233 enum es_result result; 2234 struct ghcb *ghcb; 2235 bool ret = true; 2236 2237 ghcb = __sev_get_ghcb(&state); 2238 2239 vc_ghcb_invalidate(ghcb); 2240 result = vc_init_em_ctxt(&ctxt, regs, error_code); 2241 2242 if (result == ES_OK) 2243 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 2244 2245 __sev_put_ghcb(&state); 2246 2247 /* Done - now check the result */ 2248 switch (result) { 2249 case ES_OK: 2250 vc_finish_insn(&ctxt); 2251 break; 2252 case ES_UNSUPPORTED: 2253 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 2254 error_code, regs->ip); 2255 ret = false; 2256 break; 2257 case ES_VMM_ERROR: 2258 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2259 error_code, regs->ip); 2260 ret = false; 2261 break; 2262 case ES_DECODE_FAILED: 2263 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2264 error_code, regs->ip); 2265 ret = false; 2266 break; 2267 case ES_EXCEPTION: 2268 vc_forward_exception(&ctxt); 2269 break; 2270 case ES_RETRY: 2271 /* Nothing to do */ 2272 break; 2273 default: 2274 pr_emerg("Unknown result in %s():%d\n", __func__, result); 2275 /* 2276 * Emulating the instruction which caused the #VC exception 2277 * failed - can't continue so print debug information 2278 */ 2279 BUG(); 2280 } 2281 2282 return ret; 2283 } 2284 2285 static __always_inline bool vc_is_db(unsigned long error_code) 2286 { 2287 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 2288 } 2289 2290 /* 2291 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 2292 * and will panic when an error happens. 2293 */ 2294 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 2295 { 2296 irqentry_state_t irq_state; 2297 2298 /* 2299 * With the current implementation it is always possible to switch to a 2300 * safe stack because #VC exceptions only happen at known places, like 2301 * intercepted instructions or accesses to MMIO areas/IO ports. They can 2302 * also happen with code instrumentation when the hypervisor intercepts 2303 * #DB, but the critical paths are forbidden to be instrumented, so #DB 2304 * exceptions currently also only happen in safe places. 2305 * 2306 * But keep this here in case the noinstr annotations are violated due 2307 * to bug elsewhere. 2308 */ 2309 if (unlikely(vc_from_invalid_context(regs))) { 2310 instrumentation_begin(); 2311 panic("Can't handle #VC exception from unsupported context\n"); 2312 instrumentation_end(); 2313 } 2314 2315 /* 2316 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2317 */ 2318 if (vc_is_db(error_code)) { 2319 exc_debug(regs); 2320 return; 2321 } 2322 2323 irq_state = irqentry_nmi_enter(regs); 2324 2325 instrumentation_begin(); 2326 2327 if (!vc_raw_handle_exception(regs, error_code)) { 2328 /* Show some debug info */ 2329 show_regs(regs); 2330 2331 /* Ask hypervisor to sev_es_terminate */ 2332 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2333 2334 /* If that fails and we get here - just panic */ 2335 panic("Returned from Terminate-Request to Hypervisor\n"); 2336 } 2337 2338 instrumentation_end(); 2339 irqentry_nmi_exit(regs, irq_state); 2340 } 2341 2342 /* 2343 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 2344 * and will kill the current task with SIGBUS when an error happens. 2345 */ 2346 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 2347 { 2348 /* 2349 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2350 */ 2351 if (vc_is_db(error_code)) { 2352 noist_exc_debug(regs); 2353 return; 2354 } 2355 2356 irqentry_enter_from_user_mode(regs); 2357 instrumentation_begin(); 2358 2359 if (!vc_raw_handle_exception(regs, error_code)) { 2360 /* 2361 * Do not kill the machine if user-space triggered the 2362 * exception. Send SIGBUS instead and let user-space deal with 2363 * it. 2364 */ 2365 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 2366 } 2367 2368 instrumentation_end(); 2369 irqentry_exit_to_user_mode(regs); 2370 } 2371 2372 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 2373 { 2374 unsigned long exit_code = regs->orig_ax; 2375 struct es_em_ctxt ctxt; 2376 enum es_result result; 2377 2378 vc_ghcb_invalidate(boot_ghcb); 2379 2380 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 2381 if (result == ES_OK) 2382 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 2383 2384 /* Done - now check the result */ 2385 switch (result) { 2386 case ES_OK: 2387 vc_finish_insn(&ctxt); 2388 break; 2389 case ES_UNSUPPORTED: 2390 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 2391 exit_code, regs->ip); 2392 goto fail; 2393 case ES_VMM_ERROR: 2394 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2395 exit_code, regs->ip); 2396 goto fail; 2397 case ES_DECODE_FAILED: 2398 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2399 exit_code, regs->ip); 2400 goto fail; 2401 case ES_EXCEPTION: 2402 vc_early_forward_exception(&ctxt); 2403 break; 2404 case ES_RETRY: 2405 /* Nothing to do */ 2406 break; 2407 default: 2408 BUG(); 2409 } 2410 2411 return true; 2412 2413 fail: 2414 show_regs(regs); 2415 2416 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2417 } 2418 2419 /* 2420 * Initial set up of SNP relies on information provided by the 2421 * Confidential Computing blob, which can be passed to the kernel 2422 * in the following ways, depending on how it is booted: 2423 * 2424 * - when booted via the boot/decompress kernel: 2425 * - via boot_params 2426 * 2427 * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): 2428 * - via a setup_data entry, as defined by the Linux Boot Protocol 2429 * 2430 * Scan for the blob in that order. 2431 */ 2432 static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) 2433 { 2434 struct cc_blob_sev_info *cc_info; 2435 2436 /* Boot kernel would have passed the CC blob via boot_params. */ 2437 if (bp->cc_blob_address) { 2438 cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; 2439 goto found_cc_info; 2440 } 2441 2442 /* 2443 * If kernel was booted directly, without the use of the 2444 * boot/decompression kernel, the CC blob may have been passed via 2445 * setup_data instead. 2446 */ 2447 cc_info = find_cc_blob_setup_data(bp); 2448 if (!cc_info) 2449 return NULL; 2450 2451 found_cc_info: 2452 if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) 2453 snp_abort(); 2454 2455 return cc_info; 2456 } 2457 2458 static __head void svsm_setup(struct cc_blob_sev_info *cc_info) 2459 { 2460 struct svsm_call call = {}; 2461 int ret; 2462 u64 pa; 2463 2464 /* 2465 * Record the SVSM Calling Area address (CAA) if the guest is not 2466 * running at VMPL0. The CA will be used to communicate with the 2467 * SVSM to perform the SVSM services. 2468 */ 2469 if (!svsm_setup_ca(cc_info)) 2470 return; 2471 2472 /* 2473 * It is very early in the boot and the kernel is running identity 2474 * mapped but without having adjusted the pagetables to where the 2475 * kernel was loaded (physbase), so the get the CA address using 2476 * RIP-relative addressing. 2477 */ 2478 pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); 2479 2480 /* 2481 * Switch over to the boot SVSM CA while the current CA is still 2482 * addressable. There is no GHCB at this point so use the MSR protocol. 2483 * 2484 * SVSM_CORE_REMAP_CA call: 2485 * RAX = 0 (Protocol=0, CallID=0) 2486 * RCX = New CA GPA 2487 */ 2488 call.caa = svsm_get_caa(); 2489 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 2490 call.rcx = pa; 2491 ret = svsm_perform_call_protocol(&call); 2492 if (ret) 2493 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); 2494 2495 RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; 2496 RIP_REL_REF(boot_svsm_caa_pa) = pa; 2497 } 2498 2499 bool __head snp_init(struct boot_params *bp) 2500 { 2501 struct cc_blob_sev_info *cc_info; 2502 2503 if (!bp) 2504 return false; 2505 2506 cc_info = find_cc_blob(bp); 2507 if (!cc_info) 2508 return false; 2509 2510 if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) 2511 secrets_pa = cc_info->secrets_phys; 2512 else 2513 return false; 2514 2515 setup_cpuid_table(cc_info); 2516 2517 svsm_setup(cc_info); 2518 2519 /* 2520 * The CC blob will be used later to access the secrets page. Cache 2521 * it here like the boot kernel does. 2522 */ 2523 bp->cc_blob_address = (u32)(unsigned long)cc_info; 2524 2525 return true; 2526 } 2527 2528 void __head __noreturn snp_abort(void) 2529 { 2530 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 2531 } 2532 2533 /* 2534 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 2535 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 2536 * ROM region can cause a crash since this region is not pre-validated. 2537 */ 2538 void __init snp_dmi_setup(void) 2539 { 2540 if (efi_enabled(EFI_CONFIG_TABLES)) 2541 dmi_setup(); 2542 } 2543 2544 static void dump_cpuid_table(void) 2545 { 2546 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2547 int i = 0; 2548 2549 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 2550 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 2551 2552 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 2553 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 2554 2555 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 2556 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 2557 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 2558 } 2559 } 2560 2561 /* 2562 * It is useful from an auditing/testing perspective to provide an easy way 2563 * for the guest owner to know that the CPUID table has been initialized as 2564 * expected, but that initialization happens too early in boot to print any 2565 * sort of indicator, and there's not really any other good place to do it, 2566 * so do it here. 2567 * 2568 * If running as an SNP guest, report the current VM privilege level (VMPL). 2569 */ 2570 static int __init report_snp_info(void) 2571 { 2572 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2573 2574 if (cpuid_table->count) { 2575 pr_info("Using SNP CPUID table, %d entries present.\n", 2576 cpuid_table->count); 2577 2578 if (sev_cfg.debug) 2579 dump_cpuid_table(); 2580 } 2581 2582 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2583 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 2584 2585 return 0; 2586 } 2587 arch_initcall(report_snp_info); 2588 2589 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) 2590 { 2591 /* If (new) lengths have been returned, propagate them up */ 2592 if (call->rcx_out != call->rcx) 2593 input->manifest_buf.len = call->rcx_out; 2594 2595 if (call->rdx_out != call->rdx) 2596 input->certificates_buf.len = call->rdx_out; 2597 2598 if (call->r8_out != call->r8) 2599 input->report_buf.len = call->r8_out; 2600 } 2601 2602 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, 2603 struct svsm_attest_call *input) 2604 { 2605 struct svsm_attest_call *ac; 2606 unsigned long flags; 2607 u64 attest_call_pa; 2608 int ret; 2609 2610 if (!snp_vmpl) 2611 return -EINVAL; 2612 2613 local_irq_save(flags); 2614 2615 call->caa = svsm_get_caa(); 2616 2617 ac = (struct svsm_attest_call *)call->caa->svsm_buffer; 2618 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); 2619 2620 *ac = *input; 2621 2622 /* 2623 * Set input registers for the request and set RDX and R8 to known 2624 * values in order to detect length values being returned in them. 2625 */ 2626 call->rax = call_id; 2627 call->rcx = attest_call_pa; 2628 call->rdx = -1; 2629 call->r8 = -1; 2630 ret = svsm_perform_call_protocol(call); 2631 update_attest_input(call, input); 2632 2633 local_irq_restore(flags); 2634 2635 return ret; 2636 } 2637 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); 2638 2639 static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, 2640 struct snp_guest_request_ioctl *rio) 2641 { 2642 struct ghcb_state state; 2643 struct es_em_ctxt ctxt; 2644 unsigned long flags; 2645 struct ghcb *ghcb; 2646 int ret; 2647 2648 rio->exitinfo2 = SEV_RET_NO_FW_CALL; 2649 2650 /* 2651 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 2652 * a per-CPU GHCB. 2653 */ 2654 local_irq_save(flags); 2655 2656 ghcb = __sev_get_ghcb(&state); 2657 if (!ghcb) { 2658 ret = -EIO; 2659 goto e_restore_irq; 2660 } 2661 2662 vc_ghcb_invalidate(ghcb); 2663 2664 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2665 ghcb_set_rax(ghcb, input->data_gpa); 2666 ghcb_set_rbx(ghcb, input->data_npages); 2667 } 2668 2669 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 2670 if (ret) 2671 goto e_put; 2672 2673 rio->exitinfo2 = ghcb->save.sw_exit_info_2; 2674 switch (rio->exitinfo2) { 2675 case 0: 2676 break; 2677 2678 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 2679 ret = -EAGAIN; 2680 break; 2681 2682 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 2683 /* Number of expected pages are returned in RBX */ 2684 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2685 input->data_npages = ghcb_get_rbx(ghcb); 2686 ret = -ENOSPC; 2687 break; 2688 } 2689 fallthrough; 2690 default: 2691 ret = -EIO; 2692 break; 2693 } 2694 2695 e_put: 2696 __sev_put_ghcb(&state); 2697 e_restore_irq: 2698 local_irq_restore(flags); 2699 2700 return ret; 2701 } 2702 2703 static struct platform_device sev_guest_device = { 2704 .name = "sev-guest", 2705 .id = -1, 2706 }; 2707 2708 static int __init snp_init_platform_device(void) 2709 { 2710 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2711 return -ENODEV; 2712 2713 if (platform_device_register(&sev_guest_device)) 2714 return -ENODEV; 2715 2716 pr_info("SNP guest platform device initialized.\n"); 2717 return 0; 2718 } 2719 device_initcall(snp_init_platform_device); 2720 2721 void sev_show_status(void) 2722 { 2723 int i; 2724 2725 pr_info("Status: "); 2726 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 2727 if (sev_status & BIT_ULL(i)) { 2728 if (!sev_status_feat_names[i]) 2729 continue; 2730 2731 pr_cont("%s ", sev_status_feat_names[i]); 2732 } 2733 } 2734 pr_cont("\n"); 2735 } 2736 2737 void __init snp_update_svsm_ca(void) 2738 { 2739 if (!snp_vmpl) 2740 return; 2741 2742 /* Update the CAA to a proper kernel address */ 2743 boot_svsm_caa = &boot_svsm_ca_page; 2744 } 2745 2746 #ifdef CONFIG_SYSFS 2747 static ssize_t vmpl_show(struct kobject *kobj, 2748 struct kobj_attribute *attr, char *buf) 2749 { 2750 return sysfs_emit(buf, "%d\n", snp_vmpl); 2751 } 2752 2753 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 2754 2755 static struct attribute *vmpl_attrs[] = { 2756 &vmpl_attr.attr, 2757 NULL 2758 }; 2759 2760 static struct attribute_group sev_attr_group = { 2761 .attrs = vmpl_attrs, 2762 }; 2763 2764 static int __init sev_sysfs_init(void) 2765 { 2766 struct kobject *sev_kobj; 2767 struct device *dev_root; 2768 int ret; 2769 2770 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2771 return -ENODEV; 2772 2773 dev_root = bus_get_dev_root(&cpu_subsys); 2774 if (!dev_root) 2775 return -ENODEV; 2776 2777 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 2778 put_device(dev_root); 2779 2780 if (!sev_kobj) 2781 return -ENOMEM; 2782 2783 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 2784 if (ret) 2785 kobject_put(sev_kobj); 2786 2787 return ret; 2788 } 2789 arch_initcall(sev_sysfs_init); 2790 #endif // CONFIG_SYSFS 2791 2792 static void free_shared_pages(void *buf, size_t sz) 2793 { 2794 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2795 int ret; 2796 2797 if (!buf) 2798 return; 2799 2800 ret = set_memory_encrypted((unsigned long)buf, npages); 2801 if (ret) { 2802 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 2803 return; 2804 } 2805 2806 __free_pages(virt_to_page(buf), get_order(sz)); 2807 } 2808 2809 static void *alloc_shared_pages(size_t sz) 2810 { 2811 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2812 struct page *page; 2813 int ret; 2814 2815 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 2816 if (!page) 2817 return NULL; 2818 2819 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 2820 if (ret) { 2821 pr_err("failed to mark page shared, ret=%d\n", ret); 2822 __free_pages(page, get_order(sz)); 2823 return NULL; 2824 } 2825 2826 return page_address(page); 2827 } 2828 2829 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 2830 { 2831 u8 *key = NULL; 2832 2833 switch (id) { 2834 case 0: 2835 *seqno = &secrets->os_area.msg_seqno_0; 2836 key = secrets->vmpck0; 2837 break; 2838 case 1: 2839 *seqno = &secrets->os_area.msg_seqno_1; 2840 key = secrets->vmpck1; 2841 break; 2842 case 2: 2843 *seqno = &secrets->os_area.msg_seqno_2; 2844 key = secrets->vmpck2; 2845 break; 2846 case 3: 2847 *seqno = &secrets->os_area.msg_seqno_3; 2848 key = secrets->vmpck3; 2849 break; 2850 default: 2851 break; 2852 } 2853 2854 return key; 2855 } 2856 2857 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 2858 { 2859 struct aesgcm_ctx *ctx; 2860 2861 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 2862 if (!ctx) 2863 return NULL; 2864 2865 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 2866 pr_err("Crypto context initialization failed\n"); 2867 kfree(ctx); 2868 return NULL; 2869 } 2870 2871 return ctx; 2872 } 2873 2874 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 2875 { 2876 /* Adjust the default VMPCK key based on the executing VMPL level */ 2877 if (vmpck_id == -1) 2878 vmpck_id = snp_vmpl; 2879 2880 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 2881 if (!mdesc->vmpck) { 2882 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 2883 return -EINVAL; 2884 } 2885 2886 /* Verify that VMPCK is not zero. */ 2887 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 2888 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 2889 return -EINVAL; 2890 } 2891 2892 mdesc->vmpck_id = vmpck_id; 2893 2894 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 2895 if (!mdesc->ctx) 2896 return -ENOMEM; 2897 2898 return 0; 2899 } 2900 EXPORT_SYMBOL_GPL(snp_msg_init); 2901 2902 struct snp_msg_desc *snp_msg_alloc(void) 2903 { 2904 struct snp_msg_desc *mdesc; 2905 void __iomem *mem; 2906 2907 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 2908 2909 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); 2910 if (!mdesc) 2911 return ERR_PTR(-ENOMEM); 2912 2913 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 2914 if (!mem) 2915 goto e_free_mdesc; 2916 2917 mdesc->secrets = (__force struct snp_secrets_page *)mem; 2918 2919 /* Allocate the shared page used for the request and response message. */ 2920 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2921 if (!mdesc->request) 2922 goto e_unmap; 2923 2924 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2925 if (!mdesc->response) 2926 goto e_free_request; 2927 2928 return mdesc; 2929 2930 e_free_request: 2931 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2932 e_unmap: 2933 iounmap(mem); 2934 e_free_mdesc: 2935 kfree(mdesc); 2936 2937 return ERR_PTR(-ENOMEM); 2938 } 2939 EXPORT_SYMBOL_GPL(snp_msg_alloc); 2940 2941 void snp_msg_free(struct snp_msg_desc *mdesc) 2942 { 2943 if (!mdesc) 2944 return; 2945 2946 kfree(mdesc->ctx); 2947 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 2948 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2949 iounmap((__force void __iomem *)mdesc->secrets); 2950 2951 memset(mdesc, 0, sizeof(*mdesc)); 2952 kfree(mdesc); 2953 } 2954 EXPORT_SYMBOL_GPL(snp_msg_free); 2955 2956 /* Mutex to serialize the shared buffer access and command handling. */ 2957 static DEFINE_MUTEX(snp_cmd_mutex); 2958 2959 /* 2960 * If an error is received from the host or AMD Secure Processor (ASP) there 2961 * are two options. Either retry the exact same encrypted request or discontinue 2962 * using the VMPCK. 2963 * 2964 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 2965 * encrypt the requests. The IV for this scheme is the sequence number. GCM 2966 * cannot tolerate IV reuse. 2967 * 2968 * The ASP FW v1.51 only increments the sequence numbers on a successful 2969 * guest<->ASP back and forth and only accepts messages at its exact sequence 2970 * number. 2971 * 2972 * So if the sequence number were to be reused the encryption scheme is 2973 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 2974 * will reject the request. 2975 */ 2976 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 2977 { 2978 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 2979 mdesc->vmpck_id); 2980 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 2981 mdesc->vmpck = NULL; 2982 } 2983 2984 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2985 { 2986 u64 count; 2987 2988 lockdep_assert_held(&snp_cmd_mutex); 2989 2990 /* Read the current message sequence counter from secrets pages */ 2991 count = *mdesc->os_area_msg_seqno; 2992 2993 return count + 1; 2994 } 2995 2996 /* Return a non-zero on success */ 2997 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2998 { 2999 u64 count = __snp_get_msg_seqno(mdesc); 3000 3001 /* 3002 * The message sequence counter for the SNP guest request is a 64-bit 3003 * value but the version 2 of GHCB specification defines a 32-bit storage 3004 * for it. If the counter exceeds the 32-bit value then return zero. 3005 * The caller should check the return value, but if the caller happens to 3006 * not check the value and use it, then the firmware treats zero as an 3007 * invalid number and will fail the message request. 3008 */ 3009 if (count >= UINT_MAX) { 3010 pr_err("request message sequence counter overflow\n"); 3011 return 0; 3012 } 3013 3014 return count; 3015 } 3016 3017 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 3018 { 3019 /* 3020 * The counter is also incremented by the PSP, so increment it by 2 3021 * and save in secrets page. 3022 */ 3023 *mdesc->os_area_msg_seqno += 2; 3024 } 3025 3026 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 3027 { 3028 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 3029 struct snp_guest_msg *req_msg = &mdesc->secret_request; 3030 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 3031 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 3032 struct aesgcm_ctx *ctx = mdesc->ctx; 3033 u8 iv[GCM_AES_IV_SIZE] = {}; 3034 3035 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 3036 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 3037 resp_msg_hdr->msg_sz); 3038 3039 /* Copy response from shared memory to encrypted memory. */ 3040 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 3041 3042 /* Verify that the sequence counter is incremented by 1 */ 3043 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 3044 return -EBADMSG; 3045 3046 /* Verify response message type and version number. */ 3047 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 3048 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 3049 return -EBADMSG; 3050 3051 /* 3052 * If the message size is greater than our buffer length then return 3053 * an error. 3054 */ 3055 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 3056 return -EBADMSG; 3057 3058 /* Decrypt the payload */ 3059 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 3060 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 3061 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 3062 return -EBADMSG; 3063 3064 return 0; 3065 } 3066 3067 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 3068 { 3069 struct snp_guest_msg *msg = &mdesc->secret_request; 3070 struct snp_guest_msg_hdr *hdr = &msg->hdr; 3071 struct aesgcm_ctx *ctx = mdesc->ctx; 3072 u8 iv[GCM_AES_IV_SIZE] = {}; 3073 3074 memset(msg, 0, sizeof(*msg)); 3075 3076 hdr->algo = SNP_AEAD_AES_256_GCM; 3077 hdr->hdr_version = MSG_HDR_VER; 3078 hdr->hdr_sz = sizeof(*hdr); 3079 hdr->msg_type = req->msg_type; 3080 hdr->msg_version = req->msg_version; 3081 hdr->msg_seqno = seqno; 3082 hdr->msg_vmpck = req->vmpck_id; 3083 hdr->msg_sz = req->req_sz; 3084 3085 /* Verify the sequence number is non-zero */ 3086 if (!hdr->msg_seqno) 3087 return -ENOSR; 3088 3089 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 3090 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 3091 3092 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 3093 return -EBADMSG; 3094 3095 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 3096 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 3097 AAD_LEN, iv, hdr->authtag); 3098 3099 return 0; 3100 } 3101 3102 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3103 struct snp_guest_request_ioctl *rio) 3104 { 3105 unsigned long req_start = jiffies; 3106 unsigned int override_npages = 0; 3107 u64 override_err = 0; 3108 int rc; 3109 3110 retry_request: 3111 /* 3112 * Call firmware to process the request. In this function the encrypted 3113 * message enters shared memory with the host. So after this call the 3114 * sequence number must be incremented or the VMPCK must be deleted to 3115 * prevent reuse of the IV. 3116 */ 3117 rc = snp_issue_guest_request(req, &req->input, rio); 3118 switch (rc) { 3119 case -ENOSPC: 3120 /* 3121 * If the extended guest request fails due to having too 3122 * small of a certificate data buffer, retry the same 3123 * guest request without the extended data request in 3124 * order to increment the sequence number and thus avoid 3125 * IV reuse. 3126 */ 3127 override_npages = req->input.data_npages; 3128 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3129 3130 /* 3131 * Override the error to inform callers the given extended 3132 * request buffer size was too small and give the caller the 3133 * required buffer size. 3134 */ 3135 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 3136 3137 /* 3138 * If this call to the firmware succeeds, the sequence number can 3139 * be incremented allowing for continued use of the VMPCK. If 3140 * there is an error reflected in the return value, this value 3141 * is checked further down and the result will be the deletion 3142 * of the VMPCK and the error code being propagated back to the 3143 * user as an ioctl() return code. 3144 */ 3145 goto retry_request; 3146 3147 /* 3148 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 3149 * throttled. Retry in the driver to avoid returning and reusing the 3150 * message sequence number on a different message. 3151 */ 3152 case -EAGAIN: 3153 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 3154 rc = -ETIMEDOUT; 3155 break; 3156 } 3157 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 3158 goto retry_request; 3159 } 3160 3161 /* 3162 * Increment the message sequence number. There is no harm in doing 3163 * this now because decryption uses the value stored in the response 3164 * structure and any failure will wipe the VMPCK, preventing further 3165 * use anyway. 3166 */ 3167 snp_inc_msg_seqno(mdesc); 3168 3169 if (override_err) { 3170 rio->exitinfo2 = override_err; 3171 3172 /* 3173 * If an extended guest request was issued and the supplied certificate 3174 * buffer was not large enough, a standard guest request was issued to 3175 * prevent IV reuse. If the standard request was successful, return -EIO 3176 * back to the caller as would have originally been returned. 3177 */ 3178 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3179 rc = -EIO; 3180 } 3181 3182 if (override_npages) 3183 req->input.data_npages = override_npages; 3184 3185 return rc; 3186 } 3187 3188 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3189 struct snp_guest_request_ioctl *rio) 3190 { 3191 u64 seqno; 3192 int rc; 3193 3194 guard(mutex)(&snp_cmd_mutex); 3195 3196 /* Check if the VMPCK is not empty */ 3197 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 3198 pr_err_ratelimited("VMPCK is disabled\n"); 3199 return -ENOTTY; 3200 } 3201 3202 /* Get message sequence and verify that its a non-zero */ 3203 seqno = snp_get_msg_seqno(mdesc); 3204 if (!seqno) 3205 return -EIO; 3206 3207 /* Clear shared memory's response for the host to populate. */ 3208 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 3209 3210 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 3211 rc = enc_payload(mdesc, seqno, req); 3212 if (rc) 3213 return rc; 3214 3215 /* 3216 * Write the fully encrypted request to the shared unencrypted 3217 * request page. 3218 */ 3219 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 3220 3221 /* Initialize the input address for guest request */ 3222 req->input.req_gpa = __pa(mdesc->request); 3223 req->input.resp_gpa = __pa(mdesc->response); 3224 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; 3225 3226 rc = __handle_guest_request(mdesc, req, rio); 3227 if (rc) { 3228 if (rc == -EIO && 3229 rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3230 return rc; 3231 3232 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 3233 rc, rio->exitinfo2); 3234 3235 snp_disable_vmpck(mdesc); 3236 return rc; 3237 } 3238 3239 rc = verify_and_dec_payload(mdesc, req); 3240 if (rc) { 3241 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 3242 snp_disable_vmpck(mdesc); 3243 return rc; 3244 } 3245 3246 return 0; 3247 } 3248 EXPORT_SYMBOL_GPL(snp_send_guest_request); 3249 3250 static int __init snp_get_tsc_info(void) 3251 { 3252 struct snp_guest_request_ioctl *rio; 3253 struct snp_tsc_info_resp *tsc_resp; 3254 struct snp_tsc_info_req *tsc_req; 3255 struct snp_msg_desc *mdesc; 3256 struct snp_guest_req *req; 3257 int rc = -ENOMEM; 3258 3259 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); 3260 if (!tsc_req) 3261 return rc; 3262 3263 /* 3264 * The intermediate response buffer is used while decrypting the 3265 * response payload. Make sure that it has enough space to cover 3266 * the authtag. 3267 */ 3268 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 3269 if (!tsc_resp) 3270 goto e_free_tsc_req; 3271 3272 req = kzalloc(sizeof(*req), GFP_KERNEL); 3273 if (!req) 3274 goto e_free_tsc_resp; 3275 3276 rio = kzalloc(sizeof(*rio), GFP_KERNEL); 3277 if (!rio) 3278 goto e_free_req; 3279 3280 mdesc = snp_msg_alloc(); 3281 if (IS_ERR_OR_NULL(mdesc)) 3282 goto e_free_rio; 3283 3284 rc = snp_msg_init(mdesc, snp_vmpl); 3285 if (rc) 3286 goto e_free_mdesc; 3287 3288 req->msg_version = MSG_HDR_VER; 3289 req->msg_type = SNP_MSG_TSC_INFO_REQ; 3290 req->vmpck_id = snp_vmpl; 3291 req->req_buf = tsc_req; 3292 req->req_sz = sizeof(*tsc_req); 3293 req->resp_buf = (void *)tsc_resp; 3294 req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 3295 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3296 3297 rc = snp_send_guest_request(mdesc, req, rio); 3298 if (rc) 3299 goto e_request; 3300 3301 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 3302 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 3303 tsc_resp->tsc_factor); 3304 3305 if (!tsc_resp->status) { 3306 snp_tsc_scale = tsc_resp->tsc_scale; 3307 snp_tsc_offset = tsc_resp->tsc_offset; 3308 } else { 3309 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 3310 rc = -EIO; 3311 } 3312 3313 e_request: 3314 /* The response buffer contains sensitive data, explicitly clear it. */ 3315 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 3316 e_free_mdesc: 3317 snp_msg_free(mdesc); 3318 e_free_rio: 3319 kfree(rio); 3320 e_free_req: 3321 kfree(req); 3322 e_free_tsc_resp: 3323 kfree(tsc_resp); 3324 e_free_tsc_req: 3325 kfree(tsc_req); 3326 3327 return rc; 3328 } 3329 3330 void __init snp_secure_tsc_prepare(void) 3331 { 3332 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3333 return; 3334 3335 if (snp_get_tsc_info()) { 3336 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 3337 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 3338 } 3339 3340 pr_debug("SecureTSC enabled"); 3341 } 3342 3343 static unsigned long securetsc_get_tsc_khz(void) 3344 { 3345 return snp_tsc_freq_khz; 3346 } 3347 3348 void __init snp_secure_tsc_init(void) 3349 { 3350 unsigned long long tsc_freq_mhz; 3351 3352 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3353 return; 3354 3355 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 3356 rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 3357 snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); 3358 3359 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 3360 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 3361 } 3362