1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/cpumask.h> 22 #include <linux/efi.h> 23 #include <linux/platform_device.h> 24 #include <linux/io.h> 25 #include <linux/psp-sev.h> 26 #include <linux/dmi.h> 27 #include <uapi/linux/sev-guest.h> 28 #include <crypto/gcm.h> 29 30 #include <asm/init.h> 31 #include <asm/cpu_entry_area.h> 32 #include <asm/stacktrace.h> 33 #include <asm/sev.h> 34 #include <asm/insn-eval.h> 35 #include <asm/fpu/xcr.h> 36 #include <asm/processor.h> 37 #include <asm/realmode.h> 38 #include <asm/setup.h> 39 #include <asm/traps.h> 40 #include <asm/svm.h> 41 #include <asm/smp.h> 42 #include <asm/cpu.h> 43 #include <asm/apic.h> 44 #include <asm/cpuid.h> 45 #include <asm/cmdline.h> 46 47 #define DR7_RESET_VALUE 0x400 48 49 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 50 #define AP_INIT_CS_LIMIT 0xffff 51 #define AP_INIT_DS_LIMIT 0xffff 52 #define AP_INIT_LDTR_LIMIT 0xffff 53 #define AP_INIT_GDTR_LIMIT 0xffff 54 #define AP_INIT_IDTR_LIMIT 0xffff 55 #define AP_INIT_TR_LIMIT 0xffff 56 #define AP_INIT_RFLAGS_DEFAULT 0x2 57 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 58 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 59 #define AP_INIT_XCR0_DEFAULT 0x1 60 #define AP_INIT_X87_FTW_DEFAULT 0x5555 61 #define AP_INIT_X87_FCW_DEFAULT 0x0040 62 #define AP_INIT_CR0_DEFAULT 0x60000010 63 #define AP_INIT_MXCSR_DEFAULT 0x1f80 64 65 static const char * const sev_status_feat_names[] = { 66 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 67 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 68 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 69 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 70 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 71 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 72 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 73 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 74 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 75 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 76 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 77 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 78 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 79 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 80 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 81 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 82 }; 83 84 /* For early boot hypervisor communication in SEV-ES enabled guests */ 85 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 86 87 /* 88 * Needs to be in the .data section because we need it NULL before bss is 89 * cleared 90 */ 91 static struct ghcb *boot_ghcb __section(".data"); 92 93 /* Bitmap of SEV features supported by the hypervisor */ 94 static u64 sev_hv_features __ro_after_init; 95 96 /* Secrets page physical address from the CC blob */ 97 static u64 secrets_pa __ro_after_init; 98 99 /* 100 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 101 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 102 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 103 */ 104 static u64 snp_tsc_scale __ro_after_init; 105 static u64 snp_tsc_offset __ro_after_init; 106 static u64 snp_tsc_freq_khz __ro_after_init; 107 108 /* #VC handler runtime per-CPU data */ 109 struct sev_es_runtime_data { 110 struct ghcb ghcb_page; 111 112 /* 113 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 114 * It is needed when an NMI happens while the #VC handler uses the real 115 * GHCB, and the NMI handler itself is causing another #VC exception. In 116 * that case the GHCB content of the first handler needs to be backed up 117 * and restored. 118 */ 119 struct ghcb backup_ghcb; 120 121 /* 122 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 123 * There is no need for it to be atomic, because nothing is written to 124 * the GHCB between the read and the write of ghcb_active. So it is safe 125 * to use it when a nested #VC exception happens before the write. 126 * 127 * This is necessary for example in the #VC->NMI->#VC case when the NMI 128 * happens while the first #VC handler uses the GHCB. When the NMI code 129 * raises a second #VC handler it might overwrite the contents of the 130 * GHCB written by the first handler. To avoid this the content of the 131 * GHCB is saved and restored when the GHCB is detected to be in use 132 * already. 133 */ 134 bool ghcb_active; 135 bool backup_ghcb_active; 136 137 /* 138 * Cached DR7 value - write it on DR7 writes and return it on reads. 139 * That value will never make it to the real hardware DR7 as debugging 140 * is currently unsupported in SEV-ES guests. 141 */ 142 unsigned long dr7; 143 }; 144 145 struct ghcb_state { 146 struct ghcb *ghcb; 147 }; 148 149 /* For early boot SVSM communication */ 150 static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); 151 152 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 153 static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 154 static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); 155 static DEFINE_PER_CPU(u64, svsm_caa_pa); 156 157 static __always_inline bool on_vc_stack(struct pt_regs *regs) 158 { 159 unsigned long sp = regs->sp; 160 161 /* User-mode RSP is not trusted */ 162 if (user_mode(regs)) 163 return false; 164 165 /* SYSCALL gap still has user-mode RSP */ 166 if (ip_within_syscall_gap(regs)) 167 return false; 168 169 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 170 } 171 172 /* 173 * This function handles the case when an NMI is raised in the #VC 174 * exception handler entry code, before the #VC handler has switched off 175 * its IST stack. In this case, the IST entry for #VC must be adjusted, 176 * so that any nested #VC exception will not overwrite the stack 177 * contents of the interrupted #VC handler. 178 * 179 * The IST entry is adjusted unconditionally so that it can be also be 180 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 181 * nested sev_es_ist_exit() call may adjust back the IST entry too 182 * early. 183 * 184 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 185 * on the NMI IST stack, as they are only called from NMI handling code 186 * right now. 187 */ 188 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 189 { 190 unsigned long old_ist, new_ist; 191 192 /* Read old IST entry */ 193 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 194 195 /* 196 * If NMI happened while on the #VC IST stack, set the new IST 197 * value below regs->sp, so that the interrupted stack frame is 198 * not overwritten by subsequent #VC exceptions. 199 */ 200 if (on_vc_stack(regs)) 201 new_ist = regs->sp; 202 203 /* 204 * Reserve additional 8 bytes and store old IST value so this 205 * adjustment can be unrolled in __sev_es_ist_exit(). 206 */ 207 new_ist -= sizeof(old_ist); 208 *(unsigned long *)new_ist = old_ist; 209 210 /* Set new IST entry */ 211 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 212 } 213 214 void noinstr __sev_es_ist_exit(void) 215 { 216 unsigned long ist; 217 218 /* Read IST entry */ 219 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 220 221 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 222 return; 223 224 /* Read back old IST entry and write it to the TSS */ 225 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 226 } 227 228 /* 229 * Nothing shall interrupt this code path while holding the per-CPU 230 * GHCB. The backup GHCB is only for NMIs interrupting this path. 231 * 232 * Callers must disable local interrupts around it. 233 */ 234 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 235 { 236 struct sev_es_runtime_data *data; 237 struct ghcb *ghcb; 238 239 WARN_ON(!irqs_disabled()); 240 241 data = this_cpu_read(runtime_data); 242 ghcb = &data->ghcb_page; 243 244 if (unlikely(data->ghcb_active)) { 245 /* GHCB is already in use - save its contents */ 246 247 if (unlikely(data->backup_ghcb_active)) { 248 /* 249 * Backup-GHCB is also already in use. There is no way 250 * to continue here so just kill the machine. To make 251 * panic() work, mark GHCBs inactive so that messages 252 * can be printed out. 253 */ 254 data->ghcb_active = false; 255 data->backup_ghcb_active = false; 256 257 instrumentation_begin(); 258 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 259 instrumentation_end(); 260 } 261 262 /* Mark backup_ghcb active before writing to it */ 263 data->backup_ghcb_active = true; 264 265 state->ghcb = &data->backup_ghcb; 266 267 /* Backup GHCB content */ 268 *state->ghcb = *ghcb; 269 } else { 270 state->ghcb = NULL; 271 data->ghcb_active = true; 272 } 273 274 return ghcb; 275 } 276 277 static inline u64 sev_es_rd_ghcb_msr(void) 278 { 279 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 280 } 281 282 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 283 { 284 u32 low, high; 285 286 low = (u32)(val); 287 high = (u32)(val >> 32); 288 289 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 290 } 291 292 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 293 unsigned char *buffer) 294 { 295 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 296 } 297 298 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 299 { 300 char buffer[MAX_INSN_SIZE]; 301 int insn_bytes; 302 303 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 304 if (insn_bytes == 0) { 305 /* Nothing could be copied */ 306 ctxt->fi.vector = X86_TRAP_PF; 307 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 308 ctxt->fi.cr2 = ctxt->regs->ip; 309 return ES_EXCEPTION; 310 } else if (insn_bytes == -EINVAL) { 311 /* Effective RIP could not be calculated */ 312 ctxt->fi.vector = X86_TRAP_GP; 313 ctxt->fi.error_code = 0; 314 ctxt->fi.cr2 = 0; 315 return ES_EXCEPTION; 316 } 317 318 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 319 return ES_DECODE_FAILED; 320 321 if (ctxt->insn.immediate.got) 322 return ES_OK; 323 else 324 return ES_DECODE_FAILED; 325 } 326 327 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 328 { 329 char buffer[MAX_INSN_SIZE]; 330 int res, ret; 331 332 res = vc_fetch_insn_kernel(ctxt, buffer); 333 if (res) { 334 ctxt->fi.vector = X86_TRAP_PF; 335 ctxt->fi.error_code = X86_PF_INSTR; 336 ctxt->fi.cr2 = ctxt->regs->ip; 337 return ES_EXCEPTION; 338 } 339 340 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 341 if (ret < 0) 342 return ES_DECODE_FAILED; 343 else 344 return ES_OK; 345 } 346 347 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 348 { 349 if (user_mode(ctxt->regs)) 350 return __vc_decode_user_insn(ctxt); 351 else 352 return __vc_decode_kern_insn(ctxt); 353 } 354 355 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 356 char *dst, char *buf, size_t size) 357 { 358 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 359 360 /* 361 * This function uses __put_user() independent of whether kernel or user 362 * memory is accessed. This works fine because __put_user() does no 363 * sanity checks of the pointer being accessed. All that it does is 364 * to report when the access failed. 365 * 366 * Also, this function runs in atomic context, so __put_user() is not 367 * allowed to sleep. The page-fault handler detects that it is running 368 * in atomic context and will not try to take mmap_sem and handle the 369 * fault, so additional pagefault_enable()/disable() calls are not 370 * needed. 371 * 372 * The access can't be done via copy_to_user() here because 373 * vc_write_mem() must not use string instructions to access unsafe 374 * memory. The reason is that MOVS is emulated by the #VC handler by 375 * splitting the move up into a read and a write and taking a nested #VC 376 * exception on whatever of them is the MMIO access. Using string 377 * instructions here would cause infinite nesting. 378 */ 379 switch (size) { 380 case 1: { 381 u8 d1; 382 u8 __user *target = (u8 __user *)dst; 383 384 memcpy(&d1, buf, 1); 385 if (__put_user(d1, target)) 386 goto fault; 387 break; 388 } 389 case 2: { 390 u16 d2; 391 u16 __user *target = (u16 __user *)dst; 392 393 memcpy(&d2, buf, 2); 394 if (__put_user(d2, target)) 395 goto fault; 396 break; 397 } 398 case 4: { 399 u32 d4; 400 u32 __user *target = (u32 __user *)dst; 401 402 memcpy(&d4, buf, 4); 403 if (__put_user(d4, target)) 404 goto fault; 405 break; 406 } 407 case 8: { 408 u64 d8; 409 u64 __user *target = (u64 __user *)dst; 410 411 memcpy(&d8, buf, 8); 412 if (__put_user(d8, target)) 413 goto fault; 414 break; 415 } 416 default: 417 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 418 return ES_UNSUPPORTED; 419 } 420 421 return ES_OK; 422 423 fault: 424 if (user_mode(ctxt->regs)) 425 error_code |= X86_PF_USER; 426 427 ctxt->fi.vector = X86_TRAP_PF; 428 ctxt->fi.error_code = error_code; 429 ctxt->fi.cr2 = (unsigned long)dst; 430 431 return ES_EXCEPTION; 432 } 433 434 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 435 char *src, char *buf, size_t size) 436 { 437 unsigned long error_code = X86_PF_PROT; 438 439 /* 440 * This function uses __get_user() independent of whether kernel or user 441 * memory is accessed. This works fine because __get_user() does no 442 * sanity checks of the pointer being accessed. All that it does is 443 * to report when the access failed. 444 * 445 * Also, this function runs in atomic context, so __get_user() is not 446 * allowed to sleep. The page-fault handler detects that it is running 447 * in atomic context and will not try to take mmap_sem and handle the 448 * fault, so additional pagefault_enable()/disable() calls are not 449 * needed. 450 * 451 * The access can't be done via copy_from_user() here because 452 * vc_read_mem() must not use string instructions to access unsafe 453 * memory. The reason is that MOVS is emulated by the #VC handler by 454 * splitting the move up into a read and a write and taking a nested #VC 455 * exception on whatever of them is the MMIO access. Using string 456 * instructions here would cause infinite nesting. 457 */ 458 switch (size) { 459 case 1: { 460 u8 d1; 461 u8 __user *s = (u8 __user *)src; 462 463 if (__get_user(d1, s)) 464 goto fault; 465 memcpy(buf, &d1, 1); 466 break; 467 } 468 case 2: { 469 u16 d2; 470 u16 __user *s = (u16 __user *)src; 471 472 if (__get_user(d2, s)) 473 goto fault; 474 memcpy(buf, &d2, 2); 475 break; 476 } 477 case 4: { 478 u32 d4; 479 u32 __user *s = (u32 __user *)src; 480 481 if (__get_user(d4, s)) 482 goto fault; 483 memcpy(buf, &d4, 4); 484 break; 485 } 486 case 8: { 487 u64 d8; 488 u64 __user *s = (u64 __user *)src; 489 if (__get_user(d8, s)) 490 goto fault; 491 memcpy(buf, &d8, 8); 492 break; 493 } 494 default: 495 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 496 return ES_UNSUPPORTED; 497 } 498 499 return ES_OK; 500 501 fault: 502 if (user_mode(ctxt->regs)) 503 error_code |= X86_PF_USER; 504 505 ctxt->fi.vector = X86_TRAP_PF; 506 ctxt->fi.error_code = error_code; 507 ctxt->fi.cr2 = (unsigned long)src; 508 509 return ES_EXCEPTION; 510 } 511 512 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 513 unsigned long vaddr, phys_addr_t *paddr) 514 { 515 unsigned long va = (unsigned long)vaddr; 516 unsigned int level; 517 phys_addr_t pa; 518 pgd_t *pgd; 519 pte_t *pte; 520 521 pgd = __va(read_cr3_pa()); 522 pgd = &pgd[pgd_index(va)]; 523 pte = lookup_address_in_pgd(pgd, va, &level); 524 if (!pte) { 525 ctxt->fi.vector = X86_TRAP_PF; 526 ctxt->fi.cr2 = vaddr; 527 ctxt->fi.error_code = 0; 528 529 if (user_mode(ctxt->regs)) 530 ctxt->fi.error_code |= X86_PF_USER; 531 532 return ES_EXCEPTION; 533 } 534 535 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 536 /* Emulated MMIO to/from encrypted memory not supported */ 537 return ES_UNSUPPORTED; 538 539 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 540 pa |= va & ~page_level_mask(level); 541 542 *paddr = pa; 543 544 return ES_OK; 545 } 546 547 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) 548 { 549 BUG_ON(size > 4); 550 551 if (user_mode(ctxt->regs)) { 552 struct thread_struct *t = ¤t->thread; 553 struct io_bitmap *iobm = t->io_bitmap; 554 size_t idx; 555 556 if (!iobm) 557 goto fault; 558 559 for (idx = port; idx < port + size; ++idx) { 560 if (test_bit(idx, iobm->bitmap)) 561 goto fault; 562 } 563 } 564 565 return ES_OK; 566 567 fault: 568 ctxt->fi.vector = X86_TRAP_GP; 569 ctxt->fi.error_code = 0; 570 571 return ES_EXCEPTION; 572 } 573 574 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 575 { 576 long error_code = ctxt->fi.error_code; 577 int trapnr = ctxt->fi.vector; 578 579 ctxt->regs->orig_ax = ctxt->fi.error_code; 580 581 switch (trapnr) { 582 case X86_TRAP_GP: 583 exc_general_protection(ctxt->regs, error_code); 584 break; 585 case X86_TRAP_UD: 586 exc_invalid_op(ctxt->regs); 587 break; 588 case X86_TRAP_PF: 589 write_cr2(ctxt->fi.cr2); 590 exc_page_fault(ctxt->regs, error_code); 591 break; 592 case X86_TRAP_AC: 593 exc_alignment_check(ctxt->regs, error_code); 594 break; 595 default: 596 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 597 BUG(); 598 } 599 } 600 601 /* Include code shared with pre-decompression boot stage */ 602 #include "shared.c" 603 604 static inline struct svsm_ca *svsm_get_caa(void) 605 { 606 /* 607 * Use rIP-relative references when called early in the boot. If 608 * ->use_cas is set, then it is late in the boot and no need 609 * to worry about rIP-relative references. 610 */ 611 if (RIP_REL_REF(sev_cfg).use_cas) 612 return this_cpu_read(svsm_caa); 613 else 614 return RIP_REL_REF(boot_svsm_caa); 615 } 616 617 static u64 svsm_get_caa_pa(void) 618 { 619 /* 620 * Use rIP-relative references when called early in the boot. If 621 * ->use_cas is set, then it is late in the boot and no need 622 * to worry about rIP-relative references. 623 */ 624 if (RIP_REL_REF(sev_cfg).use_cas) 625 return this_cpu_read(svsm_caa_pa); 626 else 627 return RIP_REL_REF(boot_svsm_caa_pa); 628 } 629 630 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 631 { 632 struct sev_es_runtime_data *data; 633 struct ghcb *ghcb; 634 635 WARN_ON(!irqs_disabled()); 636 637 data = this_cpu_read(runtime_data); 638 ghcb = &data->ghcb_page; 639 640 if (state->ghcb) { 641 /* Restore GHCB from Backup */ 642 *ghcb = *state->ghcb; 643 data->backup_ghcb_active = false; 644 state->ghcb = NULL; 645 } else { 646 /* 647 * Invalidate the GHCB so a VMGEXIT instruction issued 648 * from userspace won't appear to be valid. 649 */ 650 vc_ghcb_invalidate(ghcb); 651 data->ghcb_active = false; 652 } 653 } 654 655 static int svsm_perform_call_protocol(struct svsm_call *call) 656 { 657 struct ghcb_state state; 658 unsigned long flags; 659 struct ghcb *ghcb; 660 int ret; 661 662 /* 663 * This can be called very early in the boot, use native functions in 664 * order to avoid paravirt issues. 665 */ 666 flags = native_local_irq_save(); 667 668 /* 669 * Use rip-relative references when called early in the boot. If 670 * ghcbs_initialized is set, then it is late in the boot and no need 671 * to worry about rip-relative references in called functions. 672 */ 673 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 674 ghcb = __sev_get_ghcb(&state); 675 else if (RIP_REL_REF(boot_ghcb)) 676 ghcb = RIP_REL_REF(boot_ghcb); 677 else 678 ghcb = NULL; 679 680 do { 681 ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) 682 : svsm_perform_msr_protocol(call); 683 } while (ret == -EAGAIN); 684 685 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 686 __sev_put_ghcb(&state); 687 688 native_local_irq_restore(flags); 689 690 return ret; 691 } 692 693 void noinstr __sev_es_nmi_complete(void) 694 { 695 struct ghcb_state state; 696 struct ghcb *ghcb; 697 698 ghcb = __sev_get_ghcb(&state); 699 700 vc_ghcb_invalidate(ghcb); 701 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 702 ghcb_set_sw_exit_info_1(ghcb, 0); 703 ghcb_set_sw_exit_info_2(ghcb, 0); 704 705 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 706 VMGEXIT(); 707 708 __sev_put_ghcb(&state); 709 } 710 711 static u64 __init get_snp_jump_table_addr(void) 712 { 713 struct snp_secrets_page *secrets; 714 void __iomem *mem; 715 u64 addr; 716 717 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 718 if (!mem) { 719 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 720 return 0; 721 } 722 723 secrets = (__force struct snp_secrets_page *)mem; 724 725 addr = secrets->os_area.ap_jump_table_pa; 726 iounmap(mem); 727 728 return addr; 729 } 730 731 static u64 __init get_jump_table_addr(void) 732 { 733 struct ghcb_state state; 734 unsigned long flags; 735 struct ghcb *ghcb; 736 u64 ret = 0; 737 738 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 739 return get_snp_jump_table_addr(); 740 741 local_irq_save(flags); 742 743 ghcb = __sev_get_ghcb(&state); 744 745 vc_ghcb_invalidate(ghcb); 746 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 747 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 748 ghcb_set_sw_exit_info_2(ghcb, 0); 749 750 sev_es_wr_ghcb_msr(__pa(ghcb)); 751 VMGEXIT(); 752 753 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 754 ghcb_sw_exit_info_2_is_valid(ghcb)) 755 ret = ghcb->save.sw_exit_info_2; 756 757 __sev_put_ghcb(&state); 758 759 local_irq_restore(flags); 760 761 return ret; 762 } 763 764 static void __head 765 early_set_pages_state(unsigned long vaddr, unsigned long paddr, 766 unsigned long npages, enum psc_op op) 767 { 768 unsigned long paddr_end; 769 u64 val; 770 771 vaddr = vaddr & PAGE_MASK; 772 773 paddr = paddr & PAGE_MASK; 774 paddr_end = paddr + (npages << PAGE_SHIFT); 775 776 while (paddr < paddr_end) { 777 /* Page validation must be rescinded before changing to shared */ 778 if (op == SNP_PAGE_STATE_SHARED) 779 pvalidate_4k_page(vaddr, paddr, false); 780 781 /* 782 * Use the MSR protocol because this function can be called before 783 * the GHCB is established. 784 */ 785 sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); 786 VMGEXIT(); 787 788 val = sev_es_rd_ghcb_msr(); 789 790 if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) 791 goto e_term; 792 793 if (GHCB_MSR_PSC_RESP_VAL(val)) 794 goto e_term; 795 796 /* Page validation must be performed after changing to private */ 797 if (op == SNP_PAGE_STATE_PRIVATE) 798 pvalidate_4k_page(vaddr, paddr, true); 799 800 vaddr += PAGE_SIZE; 801 paddr += PAGE_SIZE; 802 } 803 804 return; 805 806 e_term: 807 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 808 } 809 810 void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, 811 unsigned long npages) 812 { 813 /* 814 * This can be invoked in early boot while running identity mapped, so 815 * use an open coded check for SNP instead of using cc_platform_has(). 816 * This eliminates worries about jump tables or checking boot_cpu_data 817 * in the cc_platform_has() function. 818 */ 819 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 820 return; 821 822 /* 823 * Ask the hypervisor to mark the memory pages as private in the RMP 824 * table. 825 */ 826 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); 827 } 828 829 void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, 830 unsigned long npages) 831 { 832 /* 833 * This can be invoked in early boot while running identity mapped, so 834 * use an open coded check for SNP instead of using cc_platform_has(). 835 * This eliminates worries about jump tables or checking boot_cpu_data 836 * in the cc_platform_has() function. 837 */ 838 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 839 return; 840 841 /* Ask hypervisor to mark the memory pages shared in the RMP table. */ 842 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); 843 } 844 845 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 846 unsigned long vaddr_end, int op) 847 { 848 struct ghcb_state state; 849 bool use_large_entry; 850 struct psc_hdr *hdr; 851 struct psc_entry *e; 852 unsigned long flags; 853 unsigned long pfn; 854 struct ghcb *ghcb; 855 int i; 856 857 hdr = &data->hdr; 858 e = data->entries; 859 860 memset(data, 0, sizeof(*data)); 861 i = 0; 862 863 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 864 hdr->end_entry = i; 865 866 if (is_vmalloc_addr((void *)vaddr)) { 867 pfn = vmalloc_to_pfn((void *)vaddr); 868 use_large_entry = false; 869 } else { 870 pfn = __pa(vaddr) >> PAGE_SHIFT; 871 use_large_entry = true; 872 } 873 874 e->gfn = pfn; 875 e->operation = op; 876 877 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 878 (vaddr_end - vaddr) >= PMD_SIZE) { 879 e->pagesize = RMP_PG_SIZE_2M; 880 vaddr += PMD_SIZE; 881 } else { 882 e->pagesize = RMP_PG_SIZE_4K; 883 vaddr += PAGE_SIZE; 884 } 885 886 e++; 887 i++; 888 } 889 890 /* Page validation must be rescinded before changing to shared */ 891 if (op == SNP_PAGE_STATE_SHARED) 892 pvalidate_pages(data); 893 894 local_irq_save(flags); 895 896 if (sev_cfg.ghcbs_initialized) 897 ghcb = __sev_get_ghcb(&state); 898 else 899 ghcb = boot_ghcb; 900 901 /* Invoke the hypervisor to perform the page state changes */ 902 if (!ghcb || vmgexit_psc(ghcb, data)) 903 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 904 905 if (sev_cfg.ghcbs_initialized) 906 __sev_put_ghcb(&state); 907 908 local_irq_restore(flags); 909 910 /* Page validation must be performed after changing to private */ 911 if (op == SNP_PAGE_STATE_PRIVATE) 912 pvalidate_pages(data); 913 914 return vaddr; 915 } 916 917 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 918 { 919 struct snp_psc_desc desc; 920 unsigned long vaddr_end; 921 922 /* Use the MSR protocol when a GHCB is not available. */ 923 if (!boot_ghcb) 924 return early_set_pages_state(vaddr, __pa(vaddr), npages, op); 925 926 vaddr = vaddr & PAGE_MASK; 927 vaddr_end = vaddr + (npages << PAGE_SHIFT); 928 929 while (vaddr < vaddr_end) 930 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 931 } 932 933 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 934 { 935 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 936 return; 937 938 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 939 } 940 941 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 942 { 943 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 944 return; 945 946 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 947 } 948 949 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 950 { 951 unsigned long vaddr, npages; 952 953 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 954 return; 955 956 vaddr = (unsigned long)__va(start); 957 npages = (end - start) >> PAGE_SHIFT; 958 959 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 960 } 961 962 static void set_pte_enc(pte_t *kpte, int level, void *va) 963 { 964 struct pte_enc_desc d = { 965 .kpte = kpte, 966 .pte_level = level, 967 .va = va, 968 .encrypt = true 969 }; 970 971 prepare_pte_enc(&d); 972 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 973 } 974 975 static void unshare_all_memory(void) 976 { 977 unsigned long addr, end, size, ghcb; 978 struct sev_es_runtime_data *data; 979 unsigned int npages, level; 980 bool skipped_addr; 981 pte_t *pte; 982 int cpu; 983 984 /* Unshare the direct mapping. */ 985 addr = PAGE_OFFSET; 986 end = PAGE_OFFSET + get_max_mapped(); 987 988 while (addr < end) { 989 pte = lookup_address(addr, &level); 990 size = page_level_size(level); 991 npages = size / PAGE_SIZE; 992 skipped_addr = false; 993 994 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 995 addr += size; 996 continue; 997 } 998 999 /* 1000 * Ensure that all the per-CPU GHCBs are made private at the 1001 * end of the unsharing loop so that the switch to the slower 1002 * MSR protocol happens last. 1003 */ 1004 for_each_possible_cpu(cpu) { 1005 data = per_cpu(runtime_data, cpu); 1006 ghcb = (unsigned long)&data->ghcb_page; 1007 1008 if (addr <= ghcb && ghcb <= addr + size) { 1009 skipped_addr = true; 1010 break; 1011 } 1012 } 1013 1014 if (!skipped_addr) { 1015 set_pte_enc(pte, level, (void *)addr); 1016 snp_set_memory_private(addr, npages); 1017 } 1018 addr += size; 1019 } 1020 1021 /* Unshare all bss decrypted memory. */ 1022 addr = (unsigned long)__start_bss_decrypted; 1023 end = (unsigned long)__start_bss_decrypted_unused; 1024 npages = (end - addr) >> PAGE_SHIFT; 1025 1026 for (; addr < end; addr += PAGE_SIZE) { 1027 pte = lookup_address(addr, &level); 1028 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 1029 continue; 1030 1031 set_pte_enc(pte, level, (void *)addr); 1032 } 1033 addr = (unsigned long)__start_bss_decrypted; 1034 snp_set_memory_private(addr, npages); 1035 1036 __flush_tlb_all(); 1037 } 1038 1039 /* Stop new private<->shared conversions */ 1040 void snp_kexec_begin(void) 1041 { 1042 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1043 return; 1044 1045 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1046 return; 1047 1048 /* 1049 * Crash kernel ends up here with interrupts disabled: can't wait for 1050 * conversions to finish. 1051 * 1052 * If race happened, just report and proceed. 1053 */ 1054 if (!set_memory_enc_stop_conversion()) 1055 pr_warn("Failed to stop shared<->private conversions\n"); 1056 } 1057 1058 void snp_kexec_finish(void) 1059 { 1060 struct sev_es_runtime_data *data; 1061 unsigned int level, cpu; 1062 unsigned long size; 1063 struct ghcb *ghcb; 1064 pte_t *pte; 1065 1066 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1067 return; 1068 1069 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1070 return; 1071 1072 unshare_all_memory(); 1073 1074 /* 1075 * Switch to using the MSR protocol to change per-CPU GHCBs to 1076 * private. All the per-CPU GHCBs have been switched back to private, 1077 * so can't do any more GHCB calls to the hypervisor beyond this point 1078 * until the kexec'ed kernel starts running. 1079 */ 1080 boot_ghcb = NULL; 1081 sev_cfg.ghcbs_initialized = false; 1082 1083 for_each_possible_cpu(cpu) { 1084 data = per_cpu(runtime_data, cpu); 1085 ghcb = &data->ghcb_page; 1086 pte = lookup_address((unsigned long)ghcb, &level); 1087 size = page_level_size(level); 1088 set_pte_enc(pte, level, (void *)ghcb); 1089 snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); 1090 } 1091 } 1092 1093 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1094 { 1095 int ret; 1096 1097 if (snp_vmpl) { 1098 struct svsm_call call = {}; 1099 unsigned long flags; 1100 1101 local_irq_save(flags); 1102 1103 call.caa = this_cpu_read(svsm_caa); 1104 call.rcx = __pa(va); 1105 1106 if (make_vmsa) { 1107 /* Protocol 0, Call ID 2 */ 1108 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1109 call.rdx = __pa(caa); 1110 call.r8 = apic_id; 1111 } else { 1112 /* Protocol 0, Call ID 3 */ 1113 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1114 } 1115 1116 ret = svsm_perform_call_protocol(&call); 1117 1118 local_irq_restore(flags); 1119 } else { 1120 /* 1121 * If the kernel runs at VMPL0, it can change the VMSA 1122 * bit for a page using the RMPADJUST instruction. 1123 * However, for the instruction to succeed it must 1124 * target the permissions of a lesser privileged (higher 1125 * numbered) VMPL level, so use VMPL1. 1126 */ 1127 u64 attrs = 1; 1128 1129 if (make_vmsa) 1130 attrs |= RMPADJUST_VMSA_PAGE_BIT; 1131 1132 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1133 } 1134 1135 return ret; 1136 } 1137 1138 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 1139 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 1140 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 1141 1142 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 1143 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 1144 1145 static void *snp_alloc_vmsa_page(int cpu) 1146 { 1147 struct page *p; 1148 1149 /* 1150 * Allocate VMSA page to work around the SNP erratum where the CPU will 1151 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 1152 * collides with the RMP entry of VMSA page. The recommended workaround 1153 * is to not use a large page. 1154 * 1155 * Allocate an 8k page which is also 8k-aligned. 1156 */ 1157 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 1158 if (!p) 1159 return NULL; 1160 1161 split_page(p, 1); 1162 1163 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 1164 __free_page(p); 1165 1166 return page_address(p + 1); 1167 } 1168 1169 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1170 { 1171 int err; 1172 1173 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1174 if (err) 1175 pr_err("clear VMSA page failed (%u), leaking page\n", err); 1176 else 1177 free_page((unsigned long)vmsa); 1178 } 1179 1180 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) 1181 { 1182 struct sev_es_save_area *cur_vmsa, *vmsa; 1183 struct ghcb_state state; 1184 struct svsm_ca *caa; 1185 unsigned long flags; 1186 struct ghcb *ghcb; 1187 u8 sipi_vector; 1188 int cpu, ret; 1189 u64 cr4; 1190 1191 /* 1192 * The hypervisor SNP feature support check has happened earlier, just check 1193 * the AP_CREATION one here. 1194 */ 1195 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 1196 return -EOPNOTSUPP; 1197 1198 /* 1199 * Verify the desired start IP against the known trampoline start IP 1200 * to catch any future new trampolines that may be introduced that 1201 * would require a new protected guest entry point. 1202 */ 1203 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 1204 "Unsupported SNP start_ip: %lx\n", start_ip)) 1205 return -EINVAL; 1206 1207 /* Override start_ip with known protected guest start IP */ 1208 start_ip = real_mode_header->sev_es_trampoline_start; 1209 1210 /* Find the logical CPU for the APIC ID */ 1211 for_each_present_cpu(cpu) { 1212 if (arch_match_cpu_phys_id(cpu, apic_id)) 1213 break; 1214 } 1215 if (cpu >= nr_cpu_ids) 1216 return -EINVAL; 1217 1218 cur_vmsa = per_cpu(sev_vmsa, cpu); 1219 1220 /* 1221 * A new VMSA is created each time because there is no guarantee that 1222 * the current VMSA is the kernels or that the vCPU is not running. If 1223 * an attempt was done to use the current VMSA with a running vCPU, a 1224 * #VMEXIT of that vCPU would wipe out all of the settings being done 1225 * here. 1226 */ 1227 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 1228 if (!vmsa) 1229 return -ENOMEM; 1230 1231 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 1232 caa = per_cpu(svsm_caa, cpu); 1233 1234 /* CR4 should maintain the MCE value */ 1235 cr4 = native_read_cr4() & X86_CR4_MCE; 1236 1237 /* Set the CS value based on the start_ip converted to a SIPI vector */ 1238 sipi_vector = (start_ip >> 12); 1239 vmsa->cs.base = sipi_vector << 12; 1240 vmsa->cs.limit = AP_INIT_CS_LIMIT; 1241 vmsa->cs.attrib = INIT_CS_ATTRIBS; 1242 vmsa->cs.selector = sipi_vector << 8; 1243 1244 /* Set the RIP value based on start_ip */ 1245 vmsa->rip = start_ip & 0xfff; 1246 1247 /* Set AP INIT defaults as documented in the APM */ 1248 vmsa->ds.limit = AP_INIT_DS_LIMIT; 1249 vmsa->ds.attrib = INIT_DS_ATTRIBS; 1250 vmsa->es = vmsa->ds; 1251 vmsa->fs = vmsa->ds; 1252 vmsa->gs = vmsa->ds; 1253 vmsa->ss = vmsa->ds; 1254 1255 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 1256 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 1257 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 1258 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 1259 vmsa->tr.limit = AP_INIT_TR_LIMIT; 1260 vmsa->tr.attrib = INIT_TR_ATTRIBS; 1261 1262 vmsa->cr4 = cr4; 1263 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 1264 vmsa->dr7 = DR7_RESET_VALUE; 1265 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 1266 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 1267 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 1268 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 1269 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 1270 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 1271 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 1272 1273 /* SVME must be set. */ 1274 vmsa->efer = EFER_SVME; 1275 1276 /* 1277 * Set the SNP-specific fields for this VMSA: 1278 * VMPL level 1279 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 1280 */ 1281 vmsa->vmpl = snp_vmpl; 1282 vmsa->sev_features = sev_status >> 2; 1283 1284 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 1285 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 1286 vmsa->tsc_scale = snp_tsc_scale; 1287 vmsa->tsc_offset = snp_tsc_offset; 1288 } 1289 1290 /* Switch the page over to a VMSA page now that it is initialized */ 1291 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 1292 if (ret) { 1293 pr_err("set VMSA page failed (%u)\n", ret); 1294 free_page((unsigned long)vmsa); 1295 1296 return -EINVAL; 1297 } 1298 1299 /* Issue VMGEXIT AP Creation NAE event */ 1300 local_irq_save(flags); 1301 1302 ghcb = __sev_get_ghcb(&state); 1303 1304 vc_ghcb_invalidate(ghcb); 1305 ghcb_set_rax(ghcb, vmsa->sev_features); 1306 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 1307 ghcb_set_sw_exit_info_1(ghcb, 1308 ((u64)apic_id << 32) | 1309 ((u64)snp_vmpl << 16) | 1310 SVM_VMGEXIT_AP_CREATE); 1311 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 1312 1313 sev_es_wr_ghcb_msr(__pa(ghcb)); 1314 VMGEXIT(); 1315 1316 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 1317 lower_32_bits(ghcb->save.sw_exit_info_1)) { 1318 pr_err("SNP AP Creation error\n"); 1319 ret = -EINVAL; 1320 } 1321 1322 __sev_put_ghcb(&state); 1323 1324 local_irq_restore(flags); 1325 1326 /* Perform cleanup if there was an error */ 1327 if (ret) { 1328 snp_cleanup_vmsa(vmsa, apic_id); 1329 vmsa = NULL; 1330 } 1331 1332 /* Free up any previous VMSA page */ 1333 if (cur_vmsa) 1334 snp_cleanup_vmsa(cur_vmsa, apic_id); 1335 1336 /* Record the current VMSA page */ 1337 per_cpu(sev_vmsa, cpu) = vmsa; 1338 1339 return ret; 1340 } 1341 1342 void __init snp_set_wakeup_secondary_cpu(void) 1343 { 1344 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1345 return; 1346 1347 /* 1348 * Always set this override if SNP is enabled. This makes it the 1349 * required method to start APs under SNP. If the hypervisor does 1350 * not support AP creation, then no APs will be started. 1351 */ 1352 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 1353 } 1354 1355 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 1356 { 1357 u16 startup_cs, startup_ip; 1358 phys_addr_t jump_table_pa; 1359 u64 jump_table_addr; 1360 u16 __iomem *jump_table; 1361 1362 jump_table_addr = get_jump_table_addr(); 1363 1364 /* On UP guests there is no jump table so this is not a failure */ 1365 if (!jump_table_addr) 1366 return 0; 1367 1368 /* Check if AP Jump Table is page-aligned */ 1369 if (jump_table_addr & ~PAGE_MASK) 1370 return -EINVAL; 1371 1372 jump_table_pa = jump_table_addr & PAGE_MASK; 1373 1374 startup_cs = (u16)(rmh->trampoline_start >> 4); 1375 startup_ip = (u16)(rmh->sev_es_trampoline_start - 1376 rmh->trampoline_start); 1377 1378 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 1379 if (!jump_table) 1380 return -EIO; 1381 1382 writew(startup_ip, &jump_table[0]); 1383 writew(startup_cs, &jump_table[1]); 1384 1385 iounmap(jump_table); 1386 1387 return 0; 1388 } 1389 1390 /* 1391 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 1392 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 1393 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 1394 */ 1395 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 1396 { 1397 struct sev_es_runtime_data *data; 1398 unsigned long address, pflags; 1399 int cpu; 1400 u64 pfn; 1401 1402 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1403 return 0; 1404 1405 pflags = _PAGE_NX | _PAGE_RW; 1406 1407 for_each_possible_cpu(cpu) { 1408 data = per_cpu(runtime_data, cpu); 1409 1410 address = __pa(&data->ghcb_page); 1411 pfn = address >> PAGE_SHIFT; 1412 1413 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 1414 return 1; 1415 } 1416 1417 return 0; 1418 } 1419 1420 /* Writes to the SVSM CAA MSR are ignored */ 1421 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) 1422 { 1423 if (write) 1424 return ES_OK; 1425 1426 regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); 1427 regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); 1428 1429 return ES_OK; 1430 } 1431 1432 /* 1433 * TSC related accesses should not exit to the hypervisor when a guest is 1434 * executing with Secure TSC enabled, so special handling is required for 1435 * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. 1436 */ 1437 static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) 1438 { 1439 u64 tsc; 1440 1441 /* 1442 * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. 1443 * Terminate the SNP guest when the interception is enabled. 1444 */ 1445 if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) 1446 return ES_VMM_ERROR; 1447 1448 /* 1449 * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC 1450 * to return undefined values, so ignore all writes. 1451 * 1452 * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use 1453 * the value returned by rdtsc_ordered(). 1454 */ 1455 if (write) { 1456 WARN_ONCE(1, "TSC MSR writes are verboten!\n"); 1457 return ES_OK; 1458 } 1459 1460 tsc = rdtsc_ordered(); 1461 regs->ax = lower_32_bits(tsc); 1462 regs->dx = upper_32_bits(tsc); 1463 1464 return ES_OK; 1465 } 1466 1467 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1468 { 1469 struct pt_regs *regs = ctxt->regs; 1470 enum es_result ret; 1471 bool write; 1472 1473 /* Is it a WRMSR? */ 1474 write = ctxt->insn.opcode.bytes[1] == 0x30; 1475 1476 switch (regs->cx) { 1477 case MSR_SVSM_CAA: 1478 return __vc_handle_msr_caa(regs, write); 1479 case MSR_IA32_TSC: 1480 case MSR_AMD64_GUEST_TSC_FREQ: 1481 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 1482 return __vc_handle_secure_tsc_msrs(regs, write); 1483 break; 1484 default: 1485 break; 1486 } 1487 1488 ghcb_set_rcx(ghcb, regs->cx); 1489 if (write) { 1490 ghcb_set_rax(ghcb, regs->ax); 1491 ghcb_set_rdx(ghcb, regs->dx); 1492 } 1493 1494 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); 1495 1496 if ((ret == ES_OK) && !write) { 1497 regs->ax = ghcb->save.rax; 1498 regs->dx = ghcb->save.rdx; 1499 } 1500 1501 return ret; 1502 } 1503 1504 static void snp_register_per_cpu_ghcb(void) 1505 { 1506 struct sev_es_runtime_data *data; 1507 struct ghcb *ghcb; 1508 1509 data = this_cpu_read(runtime_data); 1510 ghcb = &data->ghcb_page; 1511 1512 snp_register_ghcb_early(__pa(ghcb)); 1513 } 1514 1515 void setup_ghcb(void) 1516 { 1517 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1518 return; 1519 1520 /* 1521 * Check whether the runtime #VC exception handler is active. It uses 1522 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1523 * 1524 * If SNP is active, register the per-CPU GHCB page so that the runtime 1525 * exception handler can use it. 1526 */ 1527 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1528 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1529 snp_register_per_cpu_ghcb(); 1530 1531 sev_cfg.ghcbs_initialized = true; 1532 1533 return; 1534 } 1535 1536 /* 1537 * Make sure the hypervisor talks a supported protocol. 1538 * This gets called only in the BSP boot phase. 1539 */ 1540 if (!sev_es_negotiate_protocol()) 1541 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1542 1543 /* 1544 * Clear the boot_ghcb. The first exception comes in before the bss 1545 * section is cleared. 1546 */ 1547 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1548 1549 /* Alright - Make the boot-ghcb public */ 1550 boot_ghcb = &boot_ghcb_page; 1551 1552 /* SNP guest requires that GHCB GPA must be registered. */ 1553 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1554 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1555 } 1556 1557 #ifdef CONFIG_HOTPLUG_CPU 1558 static void sev_es_ap_hlt_loop(void) 1559 { 1560 struct ghcb_state state; 1561 struct ghcb *ghcb; 1562 1563 ghcb = __sev_get_ghcb(&state); 1564 1565 while (true) { 1566 vc_ghcb_invalidate(ghcb); 1567 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1568 ghcb_set_sw_exit_info_1(ghcb, 0); 1569 ghcb_set_sw_exit_info_2(ghcb, 0); 1570 1571 sev_es_wr_ghcb_msr(__pa(ghcb)); 1572 VMGEXIT(); 1573 1574 /* Wakeup signal? */ 1575 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1576 ghcb->save.sw_exit_info_2) 1577 break; 1578 } 1579 1580 __sev_put_ghcb(&state); 1581 } 1582 1583 /* 1584 * Play_dead handler when running under SEV-ES. This is needed because 1585 * the hypervisor can't deliver an SIPI request to restart the AP. 1586 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1587 * hypervisor wakes it up again. 1588 */ 1589 static void sev_es_play_dead(void) 1590 { 1591 play_dead_common(); 1592 1593 /* IRQs now disabled */ 1594 1595 sev_es_ap_hlt_loop(); 1596 1597 /* 1598 * If we get here, the VCPU was woken up again. Jump to CPU 1599 * startup code to get it back online. 1600 */ 1601 soft_restart_cpu(); 1602 } 1603 #else /* CONFIG_HOTPLUG_CPU */ 1604 #define sev_es_play_dead native_play_dead 1605 #endif /* CONFIG_HOTPLUG_CPU */ 1606 1607 #ifdef CONFIG_SMP 1608 static void __init sev_es_setup_play_dead(void) 1609 { 1610 smp_ops.play_dead = sev_es_play_dead; 1611 } 1612 #else 1613 static inline void sev_es_setup_play_dead(void) { } 1614 #endif 1615 1616 static void __init alloc_runtime_data(int cpu) 1617 { 1618 struct sev_es_runtime_data *data; 1619 1620 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1621 if (!data) 1622 panic("Can't allocate SEV-ES runtime data"); 1623 1624 per_cpu(runtime_data, cpu) = data; 1625 1626 if (snp_vmpl) { 1627 struct svsm_ca *caa; 1628 1629 /* Allocate the SVSM CA page if an SVSM is present */ 1630 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE); 1631 1632 per_cpu(svsm_caa, cpu) = caa; 1633 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1634 } 1635 } 1636 1637 static void __init init_ghcb(int cpu) 1638 { 1639 struct sev_es_runtime_data *data; 1640 int err; 1641 1642 data = per_cpu(runtime_data, cpu); 1643 1644 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1645 sizeof(data->ghcb_page)); 1646 if (err) 1647 panic("Can't map GHCBs unencrypted"); 1648 1649 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1650 1651 data->ghcb_active = false; 1652 data->backup_ghcb_active = false; 1653 } 1654 1655 void __init sev_es_init_vc_handling(void) 1656 { 1657 int cpu; 1658 1659 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1660 1661 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1662 return; 1663 1664 if (!sev_es_check_cpu_features()) 1665 panic("SEV-ES CPU Features missing"); 1666 1667 /* 1668 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1669 * features. 1670 */ 1671 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1672 sev_hv_features = get_hv_features(); 1673 1674 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1675 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1676 } 1677 1678 /* Initialize per-cpu GHCB pages */ 1679 for_each_possible_cpu(cpu) { 1680 alloc_runtime_data(cpu); 1681 init_ghcb(cpu); 1682 } 1683 1684 /* If running under an SVSM, switch to the per-cpu CA */ 1685 if (snp_vmpl) { 1686 struct svsm_call call = {}; 1687 unsigned long flags; 1688 int ret; 1689 1690 local_irq_save(flags); 1691 1692 /* 1693 * SVSM_CORE_REMAP_CA call: 1694 * RAX = 0 (Protocol=0, CallID=0) 1695 * RCX = New CA GPA 1696 */ 1697 call.caa = svsm_get_caa(); 1698 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 1699 call.rcx = this_cpu_read(svsm_caa_pa); 1700 ret = svsm_perform_call_protocol(&call); 1701 if (ret) 1702 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", 1703 ret, call.rax_out); 1704 1705 sev_cfg.use_cas = true; 1706 1707 local_irq_restore(flags); 1708 } 1709 1710 sev_es_setup_play_dead(); 1711 1712 /* Secondary CPUs use the runtime #VC handler */ 1713 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1714 } 1715 1716 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 1717 { 1718 int trapnr = ctxt->fi.vector; 1719 1720 if (trapnr == X86_TRAP_PF) 1721 native_write_cr2(ctxt->fi.cr2); 1722 1723 ctxt->regs->orig_ax = ctxt->fi.error_code; 1724 do_early_exception(ctxt->regs, trapnr); 1725 } 1726 1727 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 1728 { 1729 long *reg_array; 1730 int offset; 1731 1732 reg_array = (long *)ctxt->regs; 1733 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 1734 1735 if (offset < 0) 1736 return NULL; 1737 1738 offset /= sizeof(long); 1739 1740 return reg_array + offset; 1741 } 1742 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 1743 unsigned int bytes, bool read) 1744 { 1745 u64 exit_code, exit_info_1, exit_info_2; 1746 unsigned long ghcb_pa = __pa(ghcb); 1747 enum es_result res; 1748 phys_addr_t paddr; 1749 void __user *ref; 1750 1751 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 1752 if (ref == (void __user *)-1L) 1753 return ES_UNSUPPORTED; 1754 1755 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 1756 1757 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 1758 if (res != ES_OK) { 1759 if (res == ES_EXCEPTION && !read) 1760 ctxt->fi.error_code |= X86_PF_WRITE; 1761 1762 return res; 1763 } 1764 1765 exit_info_1 = paddr; 1766 /* Can never be greater than 8 */ 1767 exit_info_2 = bytes; 1768 1769 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 1770 1771 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 1772 } 1773 1774 /* 1775 * The MOVS instruction has two memory operands, which raises the 1776 * problem that it is not known whether the access to the source or the 1777 * destination caused the #VC exception (and hence whether an MMIO read 1778 * or write operation needs to be emulated). 1779 * 1780 * Instead of playing games with walking page-tables and trying to guess 1781 * whether the source or destination is an MMIO range, split the move 1782 * into two operations, a read and a write with only one memory operand. 1783 * This will cause a nested #VC exception on the MMIO address which can 1784 * then be handled. 1785 * 1786 * This implementation has the benefit that it also supports MOVS where 1787 * source _and_ destination are MMIO regions. 1788 * 1789 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 1790 * rare operation. If it turns out to be a performance problem the split 1791 * operations can be moved to memcpy_fromio() and memcpy_toio(). 1792 */ 1793 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 1794 unsigned int bytes) 1795 { 1796 unsigned long ds_base, es_base; 1797 unsigned char *src, *dst; 1798 unsigned char buffer[8]; 1799 enum es_result ret; 1800 bool rep; 1801 int off; 1802 1803 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 1804 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 1805 1806 if (ds_base == -1L || es_base == -1L) { 1807 ctxt->fi.vector = X86_TRAP_GP; 1808 ctxt->fi.error_code = 0; 1809 return ES_EXCEPTION; 1810 } 1811 1812 src = ds_base + (unsigned char *)ctxt->regs->si; 1813 dst = es_base + (unsigned char *)ctxt->regs->di; 1814 1815 ret = vc_read_mem(ctxt, src, buffer, bytes); 1816 if (ret != ES_OK) 1817 return ret; 1818 1819 ret = vc_write_mem(ctxt, dst, buffer, bytes); 1820 if (ret != ES_OK) 1821 return ret; 1822 1823 if (ctxt->regs->flags & X86_EFLAGS_DF) 1824 off = -bytes; 1825 else 1826 off = bytes; 1827 1828 ctxt->regs->si += off; 1829 ctxt->regs->di += off; 1830 1831 rep = insn_has_rep_prefix(&ctxt->insn); 1832 if (rep) 1833 ctxt->regs->cx -= 1; 1834 1835 if (!rep || ctxt->regs->cx == 0) 1836 return ES_OK; 1837 else 1838 return ES_RETRY; 1839 } 1840 1841 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1842 { 1843 struct insn *insn = &ctxt->insn; 1844 enum insn_mmio_type mmio; 1845 unsigned int bytes = 0; 1846 enum es_result ret; 1847 u8 sign_byte; 1848 long *reg_data; 1849 1850 mmio = insn_decode_mmio(insn, &bytes); 1851 if (mmio == INSN_MMIO_DECODE_FAILED) 1852 return ES_DECODE_FAILED; 1853 1854 if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { 1855 reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); 1856 if (!reg_data) 1857 return ES_DECODE_FAILED; 1858 } 1859 1860 if (user_mode(ctxt->regs)) 1861 return ES_UNSUPPORTED; 1862 1863 switch (mmio) { 1864 case INSN_MMIO_WRITE: 1865 memcpy(ghcb->shared_buffer, reg_data, bytes); 1866 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1867 break; 1868 case INSN_MMIO_WRITE_IMM: 1869 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1870 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1871 break; 1872 case INSN_MMIO_READ: 1873 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1874 if (ret) 1875 break; 1876 1877 /* Zero-extend for 32-bit operation */ 1878 if (bytes == 4) 1879 *reg_data = 0; 1880 1881 memcpy(reg_data, ghcb->shared_buffer, bytes); 1882 break; 1883 case INSN_MMIO_READ_ZERO_EXTEND: 1884 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1885 if (ret) 1886 break; 1887 1888 /* Zero extend based on operand size */ 1889 memset(reg_data, 0, insn->opnd_bytes); 1890 memcpy(reg_data, ghcb->shared_buffer, bytes); 1891 break; 1892 case INSN_MMIO_READ_SIGN_EXTEND: 1893 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1894 if (ret) 1895 break; 1896 1897 if (bytes == 1) { 1898 u8 *val = (u8 *)ghcb->shared_buffer; 1899 1900 sign_byte = (*val & 0x80) ? 0xff : 0x00; 1901 } else { 1902 u16 *val = (u16 *)ghcb->shared_buffer; 1903 1904 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 1905 } 1906 1907 /* Sign extend based on operand size */ 1908 memset(reg_data, sign_byte, insn->opnd_bytes); 1909 memcpy(reg_data, ghcb->shared_buffer, bytes); 1910 break; 1911 case INSN_MMIO_MOVS: 1912 ret = vc_handle_mmio_movs(ctxt, bytes); 1913 break; 1914 default: 1915 ret = ES_UNSUPPORTED; 1916 break; 1917 } 1918 1919 return ret; 1920 } 1921 1922 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1923 struct es_em_ctxt *ctxt) 1924 { 1925 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1926 long val, *reg = vc_insn_get_rm(ctxt); 1927 enum es_result ret; 1928 1929 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1930 return ES_VMM_ERROR; 1931 1932 if (!reg) 1933 return ES_DECODE_FAILED; 1934 1935 val = *reg; 1936 1937 /* Upper 32 bits must be written as zeroes */ 1938 if (val >> 32) { 1939 ctxt->fi.vector = X86_TRAP_GP; 1940 ctxt->fi.error_code = 0; 1941 return ES_EXCEPTION; 1942 } 1943 1944 /* Clear out other reserved bits and set bit 10 */ 1945 val = (val & 0xffff23ffL) | BIT(10); 1946 1947 /* Early non-zero writes to DR7 are not supported */ 1948 if (!data && (val & ~DR7_RESET_VALUE)) 1949 return ES_UNSUPPORTED; 1950 1951 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1952 ghcb_set_rax(ghcb, val); 1953 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1954 if (ret != ES_OK) 1955 return ret; 1956 1957 if (data) 1958 data->dr7 = val; 1959 1960 return ES_OK; 1961 } 1962 1963 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1964 struct es_em_ctxt *ctxt) 1965 { 1966 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1967 long *reg = vc_insn_get_rm(ctxt); 1968 1969 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1970 return ES_VMM_ERROR; 1971 1972 if (!reg) 1973 return ES_DECODE_FAILED; 1974 1975 if (data) 1976 *reg = data->dr7; 1977 else 1978 *reg = DR7_RESET_VALUE; 1979 1980 return ES_OK; 1981 } 1982 1983 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1984 struct es_em_ctxt *ctxt) 1985 { 1986 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 1987 } 1988 1989 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1990 { 1991 enum es_result ret; 1992 1993 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1994 1995 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 1996 if (ret != ES_OK) 1997 return ret; 1998 1999 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 2000 return ES_VMM_ERROR; 2001 2002 ctxt->regs->ax = ghcb->save.rax; 2003 ctxt->regs->dx = ghcb->save.rdx; 2004 2005 return ES_OK; 2006 } 2007 2008 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 2009 struct es_em_ctxt *ctxt) 2010 { 2011 /* 2012 * Treat it as a NOP and do not leak a physical address to the 2013 * hypervisor. 2014 */ 2015 return ES_OK; 2016 } 2017 2018 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 2019 struct es_em_ctxt *ctxt) 2020 { 2021 /* Treat the same as MONITOR/MONITORX */ 2022 return ES_OK; 2023 } 2024 2025 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 2026 struct es_em_ctxt *ctxt) 2027 { 2028 enum es_result ret; 2029 2030 ghcb_set_rax(ghcb, ctxt->regs->ax); 2031 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 2032 2033 if (x86_platform.hyper.sev_es_hcall_prepare) 2034 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 2035 2036 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 2037 if (ret != ES_OK) 2038 return ret; 2039 2040 if (!ghcb_rax_is_valid(ghcb)) 2041 return ES_VMM_ERROR; 2042 2043 ctxt->regs->ax = ghcb->save.rax; 2044 2045 /* 2046 * Call sev_es_hcall_finish() after regs->ax is already set. 2047 * This allows the hypervisor handler to overwrite it again if 2048 * necessary. 2049 */ 2050 if (x86_platform.hyper.sev_es_hcall_finish && 2051 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 2052 return ES_VMM_ERROR; 2053 2054 return ES_OK; 2055 } 2056 2057 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 2058 struct es_em_ctxt *ctxt) 2059 { 2060 /* 2061 * Calling ecx_alignment_check() directly does not work, because it 2062 * enables IRQs and the GHCB is active. Forward the exception and call 2063 * it later from vc_forward_exception(). 2064 */ 2065 ctxt->fi.vector = X86_TRAP_AC; 2066 ctxt->fi.error_code = 0; 2067 return ES_EXCEPTION; 2068 } 2069 2070 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 2071 struct ghcb *ghcb, 2072 unsigned long exit_code) 2073 { 2074 enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); 2075 2076 if (result != ES_OK) 2077 return result; 2078 2079 switch (exit_code) { 2080 case SVM_EXIT_READ_DR7: 2081 result = vc_handle_dr7_read(ghcb, ctxt); 2082 break; 2083 case SVM_EXIT_WRITE_DR7: 2084 result = vc_handle_dr7_write(ghcb, ctxt); 2085 break; 2086 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 2087 result = vc_handle_trap_ac(ghcb, ctxt); 2088 break; 2089 case SVM_EXIT_RDTSC: 2090 case SVM_EXIT_RDTSCP: 2091 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 2092 break; 2093 case SVM_EXIT_RDPMC: 2094 result = vc_handle_rdpmc(ghcb, ctxt); 2095 break; 2096 case SVM_EXIT_INVD: 2097 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 2098 result = ES_UNSUPPORTED; 2099 break; 2100 case SVM_EXIT_CPUID: 2101 result = vc_handle_cpuid(ghcb, ctxt); 2102 break; 2103 case SVM_EXIT_IOIO: 2104 result = vc_handle_ioio(ghcb, ctxt); 2105 break; 2106 case SVM_EXIT_MSR: 2107 result = vc_handle_msr(ghcb, ctxt); 2108 break; 2109 case SVM_EXIT_VMMCALL: 2110 result = vc_handle_vmmcall(ghcb, ctxt); 2111 break; 2112 case SVM_EXIT_WBINVD: 2113 result = vc_handle_wbinvd(ghcb, ctxt); 2114 break; 2115 case SVM_EXIT_MONITOR: 2116 result = vc_handle_monitor(ghcb, ctxt); 2117 break; 2118 case SVM_EXIT_MWAIT: 2119 result = vc_handle_mwait(ghcb, ctxt); 2120 break; 2121 case SVM_EXIT_NPF: 2122 result = vc_handle_mmio(ghcb, ctxt); 2123 break; 2124 default: 2125 /* 2126 * Unexpected #VC exception 2127 */ 2128 result = ES_UNSUPPORTED; 2129 } 2130 2131 return result; 2132 } 2133 2134 static __always_inline bool is_vc2_stack(unsigned long sp) 2135 { 2136 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 2137 } 2138 2139 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 2140 { 2141 unsigned long sp, prev_sp; 2142 2143 sp = (unsigned long)regs; 2144 prev_sp = regs->sp; 2145 2146 /* 2147 * If the code was already executing on the VC2 stack when the #VC 2148 * happened, let it proceed to the normal handling routine. This way the 2149 * code executing on the VC2 stack can cause #VC exceptions to get handled. 2150 */ 2151 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 2152 } 2153 2154 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 2155 { 2156 struct ghcb_state state; 2157 struct es_em_ctxt ctxt; 2158 enum es_result result; 2159 struct ghcb *ghcb; 2160 bool ret = true; 2161 2162 ghcb = __sev_get_ghcb(&state); 2163 2164 vc_ghcb_invalidate(ghcb); 2165 result = vc_init_em_ctxt(&ctxt, regs, error_code); 2166 2167 if (result == ES_OK) 2168 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 2169 2170 __sev_put_ghcb(&state); 2171 2172 /* Done - now check the result */ 2173 switch (result) { 2174 case ES_OK: 2175 vc_finish_insn(&ctxt); 2176 break; 2177 case ES_UNSUPPORTED: 2178 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 2179 error_code, regs->ip); 2180 ret = false; 2181 break; 2182 case ES_VMM_ERROR: 2183 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2184 error_code, regs->ip); 2185 ret = false; 2186 break; 2187 case ES_DECODE_FAILED: 2188 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2189 error_code, regs->ip); 2190 ret = false; 2191 break; 2192 case ES_EXCEPTION: 2193 vc_forward_exception(&ctxt); 2194 break; 2195 case ES_RETRY: 2196 /* Nothing to do */ 2197 break; 2198 default: 2199 pr_emerg("Unknown result in %s():%d\n", __func__, result); 2200 /* 2201 * Emulating the instruction which caused the #VC exception 2202 * failed - can't continue so print debug information 2203 */ 2204 BUG(); 2205 } 2206 2207 return ret; 2208 } 2209 2210 static __always_inline bool vc_is_db(unsigned long error_code) 2211 { 2212 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 2213 } 2214 2215 /* 2216 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 2217 * and will panic when an error happens. 2218 */ 2219 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 2220 { 2221 irqentry_state_t irq_state; 2222 2223 /* 2224 * With the current implementation it is always possible to switch to a 2225 * safe stack because #VC exceptions only happen at known places, like 2226 * intercepted instructions or accesses to MMIO areas/IO ports. They can 2227 * also happen with code instrumentation when the hypervisor intercepts 2228 * #DB, but the critical paths are forbidden to be instrumented, so #DB 2229 * exceptions currently also only happen in safe places. 2230 * 2231 * But keep this here in case the noinstr annotations are violated due 2232 * to bug elsewhere. 2233 */ 2234 if (unlikely(vc_from_invalid_context(regs))) { 2235 instrumentation_begin(); 2236 panic("Can't handle #VC exception from unsupported context\n"); 2237 instrumentation_end(); 2238 } 2239 2240 /* 2241 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2242 */ 2243 if (vc_is_db(error_code)) { 2244 exc_debug(regs); 2245 return; 2246 } 2247 2248 irq_state = irqentry_nmi_enter(regs); 2249 2250 instrumentation_begin(); 2251 2252 if (!vc_raw_handle_exception(regs, error_code)) { 2253 /* Show some debug info */ 2254 show_regs(regs); 2255 2256 /* Ask hypervisor to sev_es_terminate */ 2257 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2258 2259 /* If that fails and we get here - just panic */ 2260 panic("Returned from Terminate-Request to Hypervisor\n"); 2261 } 2262 2263 instrumentation_end(); 2264 irqentry_nmi_exit(regs, irq_state); 2265 } 2266 2267 /* 2268 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 2269 * and will kill the current task with SIGBUS when an error happens. 2270 */ 2271 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 2272 { 2273 /* 2274 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2275 */ 2276 if (vc_is_db(error_code)) { 2277 noist_exc_debug(regs); 2278 return; 2279 } 2280 2281 irqentry_enter_from_user_mode(regs); 2282 instrumentation_begin(); 2283 2284 if (!vc_raw_handle_exception(regs, error_code)) { 2285 /* 2286 * Do not kill the machine if user-space triggered the 2287 * exception. Send SIGBUS instead and let user-space deal with 2288 * it. 2289 */ 2290 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 2291 } 2292 2293 instrumentation_end(); 2294 irqentry_exit_to_user_mode(regs); 2295 } 2296 2297 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 2298 { 2299 unsigned long exit_code = regs->orig_ax; 2300 struct es_em_ctxt ctxt; 2301 enum es_result result; 2302 2303 vc_ghcb_invalidate(boot_ghcb); 2304 2305 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 2306 if (result == ES_OK) 2307 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 2308 2309 /* Done - now check the result */ 2310 switch (result) { 2311 case ES_OK: 2312 vc_finish_insn(&ctxt); 2313 break; 2314 case ES_UNSUPPORTED: 2315 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 2316 exit_code, regs->ip); 2317 goto fail; 2318 case ES_VMM_ERROR: 2319 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2320 exit_code, regs->ip); 2321 goto fail; 2322 case ES_DECODE_FAILED: 2323 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2324 exit_code, regs->ip); 2325 goto fail; 2326 case ES_EXCEPTION: 2327 vc_early_forward_exception(&ctxt); 2328 break; 2329 case ES_RETRY: 2330 /* Nothing to do */ 2331 break; 2332 default: 2333 BUG(); 2334 } 2335 2336 return true; 2337 2338 fail: 2339 show_regs(regs); 2340 2341 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2342 } 2343 2344 /* 2345 * Initial set up of SNP relies on information provided by the 2346 * Confidential Computing blob, which can be passed to the kernel 2347 * in the following ways, depending on how it is booted: 2348 * 2349 * - when booted via the boot/decompress kernel: 2350 * - via boot_params 2351 * 2352 * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): 2353 * - via a setup_data entry, as defined by the Linux Boot Protocol 2354 * 2355 * Scan for the blob in that order. 2356 */ 2357 static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) 2358 { 2359 struct cc_blob_sev_info *cc_info; 2360 2361 /* Boot kernel would have passed the CC blob via boot_params. */ 2362 if (bp->cc_blob_address) { 2363 cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; 2364 goto found_cc_info; 2365 } 2366 2367 /* 2368 * If kernel was booted directly, without the use of the 2369 * boot/decompression kernel, the CC blob may have been passed via 2370 * setup_data instead. 2371 */ 2372 cc_info = find_cc_blob_setup_data(bp); 2373 if (!cc_info) 2374 return NULL; 2375 2376 found_cc_info: 2377 if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) 2378 snp_abort(); 2379 2380 return cc_info; 2381 } 2382 2383 static __head void svsm_setup(struct cc_blob_sev_info *cc_info) 2384 { 2385 struct svsm_call call = {}; 2386 int ret; 2387 u64 pa; 2388 2389 /* 2390 * Record the SVSM Calling Area address (CAA) if the guest is not 2391 * running at VMPL0. The CA will be used to communicate with the 2392 * SVSM to perform the SVSM services. 2393 */ 2394 if (!svsm_setup_ca(cc_info)) 2395 return; 2396 2397 /* 2398 * It is very early in the boot and the kernel is running identity 2399 * mapped but without having adjusted the pagetables to where the 2400 * kernel was loaded (physbase), so the get the CA address using 2401 * RIP-relative addressing. 2402 */ 2403 pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); 2404 2405 /* 2406 * Switch over to the boot SVSM CA while the current CA is still 2407 * addressable. There is no GHCB at this point so use the MSR protocol. 2408 * 2409 * SVSM_CORE_REMAP_CA call: 2410 * RAX = 0 (Protocol=0, CallID=0) 2411 * RCX = New CA GPA 2412 */ 2413 call.caa = svsm_get_caa(); 2414 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 2415 call.rcx = pa; 2416 ret = svsm_perform_call_protocol(&call); 2417 if (ret) 2418 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); 2419 2420 RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; 2421 RIP_REL_REF(boot_svsm_caa_pa) = pa; 2422 } 2423 2424 bool __head snp_init(struct boot_params *bp) 2425 { 2426 struct cc_blob_sev_info *cc_info; 2427 2428 if (!bp) 2429 return false; 2430 2431 cc_info = find_cc_blob(bp); 2432 if (!cc_info) 2433 return false; 2434 2435 if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) 2436 secrets_pa = cc_info->secrets_phys; 2437 else 2438 return false; 2439 2440 setup_cpuid_table(cc_info); 2441 2442 svsm_setup(cc_info); 2443 2444 /* 2445 * The CC blob will be used later to access the secrets page. Cache 2446 * it here like the boot kernel does. 2447 */ 2448 bp->cc_blob_address = (u32)(unsigned long)cc_info; 2449 2450 return true; 2451 } 2452 2453 void __head __noreturn snp_abort(void) 2454 { 2455 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 2456 } 2457 2458 /* 2459 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 2460 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 2461 * ROM region can cause a crash since this region is not pre-validated. 2462 */ 2463 void __init snp_dmi_setup(void) 2464 { 2465 if (efi_enabled(EFI_CONFIG_TABLES)) 2466 dmi_setup(); 2467 } 2468 2469 static void dump_cpuid_table(void) 2470 { 2471 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2472 int i = 0; 2473 2474 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 2475 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 2476 2477 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 2478 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 2479 2480 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 2481 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 2482 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 2483 } 2484 } 2485 2486 /* 2487 * It is useful from an auditing/testing perspective to provide an easy way 2488 * for the guest owner to know that the CPUID table has been initialized as 2489 * expected, but that initialization happens too early in boot to print any 2490 * sort of indicator, and there's not really any other good place to do it, 2491 * so do it here. 2492 * 2493 * If running as an SNP guest, report the current VM privilege level (VMPL). 2494 */ 2495 static int __init report_snp_info(void) 2496 { 2497 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2498 2499 if (cpuid_table->count) { 2500 pr_info("Using SNP CPUID table, %d entries present.\n", 2501 cpuid_table->count); 2502 2503 if (sev_cfg.debug) 2504 dump_cpuid_table(); 2505 } 2506 2507 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2508 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 2509 2510 return 0; 2511 } 2512 arch_initcall(report_snp_info); 2513 2514 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) 2515 { 2516 /* If (new) lengths have been returned, propagate them up */ 2517 if (call->rcx_out != call->rcx) 2518 input->manifest_buf.len = call->rcx_out; 2519 2520 if (call->rdx_out != call->rdx) 2521 input->certificates_buf.len = call->rdx_out; 2522 2523 if (call->r8_out != call->r8) 2524 input->report_buf.len = call->r8_out; 2525 } 2526 2527 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, 2528 struct svsm_attest_call *input) 2529 { 2530 struct svsm_attest_call *ac; 2531 unsigned long flags; 2532 u64 attest_call_pa; 2533 int ret; 2534 2535 if (!snp_vmpl) 2536 return -EINVAL; 2537 2538 local_irq_save(flags); 2539 2540 call->caa = svsm_get_caa(); 2541 2542 ac = (struct svsm_attest_call *)call->caa->svsm_buffer; 2543 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); 2544 2545 *ac = *input; 2546 2547 /* 2548 * Set input registers for the request and set RDX and R8 to known 2549 * values in order to detect length values being returned in them. 2550 */ 2551 call->rax = call_id; 2552 call->rcx = attest_call_pa; 2553 call->rdx = -1; 2554 call->r8 = -1; 2555 ret = svsm_perform_call_protocol(call); 2556 update_attest_input(call, input); 2557 2558 local_irq_restore(flags); 2559 2560 return ret; 2561 } 2562 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); 2563 2564 static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, 2565 struct snp_guest_request_ioctl *rio) 2566 { 2567 struct ghcb_state state; 2568 struct es_em_ctxt ctxt; 2569 unsigned long flags; 2570 struct ghcb *ghcb; 2571 int ret; 2572 2573 rio->exitinfo2 = SEV_RET_NO_FW_CALL; 2574 2575 /* 2576 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 2577 * a per-CPU GHCB. 2578 */ 2579 local_irq_save(flags); 2580 2581 ghcb = __sev_get_ghcb(&state); 2582 if (!ghcb) { 2583 ret = -EIO; 2584 goto e_restore_irq; 2585 } 2586 2587 vc_ghcb_invalidate(ghcb); 2588 2589 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2590 ghcb_set_rax(ghcb, input->data_gpa); 2591 ghcb_set_rbx(ghcb, input->data_npages); 2592 } 2593 2594 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 2595 if (ret) 2596 goto e_put; 2597 2598 rio->exitinfo2 = ghcb->save.sw_exit_info_2; 2599 switch (rio->exitinfo2) { 2600 case 0: 2601 break; 2602 2603 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 2604 ret = -EAGAIN; 2605 break; 2606 2607 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 2608 /* Number of expected pages are returned in RBX */ 2609 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2610 input->data_npages = ghcb_get_rbx(ghcb); 2611 ret = -ENOSPC; 2612 break; 2613 } 2614 fallthrough; 2615 default: 2616 ret = -EIO; 2617 break; 2618 } 2619 2620 e_put: 2621 __sev_put_ghcb(&state); 2622 e_restore_irq: 2623 local_irq_restore(flags); 2624 2625 return ret; 2626 } 2627 2628 static struct platform_device sev_guest_device = { 2629 .name = "sev-guest", 2630 .id = -1, 2631 }; 2632 2633 static int __init snp_init_platform_device(void) 2634 { 2635 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2636 return -ENODEV; 2637 2638 if (platform_device_register(&sev_guest_device)) 2639 return -ENODEV; 2640 2641 pr_info("SNP guest platform device initialized.\n"); 2642 return 0; 2643 } 2644 device_initcall(snp_init_platform_device); 2645 2646 void sev_show_status(void) 2647 { 2648 int i; 2649 2650 pr_info("Status: "); 2651 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 2652 if (sev_status & BIT_ULL(i)) { 2653 if (!sev_status_feat_names[i]) 2654 continue; 2655 2656 pr_cont("%s ", sev_status_feat_names[i]); 2657 } 2658 } 2659 pr_cont("\n"); 2660 } 2661 2662 void __init snp_update_svsm_ca(void) 2663 { 2664 if (!snp_vmpl) 2665 return; 2666 2667 /* Update the CAA to a proper kernel address */ 2668 boot_svsm_caa = &boot_svsm_ca_page; 2669 } 2670 2671 #ifdef CONFIG_SYSFS 2672 static ssize_t vmpl_show(struct kobject *kobj, 2673 struct kobj_attribute *attr, char *buf) 2674 { 2675 return sysfs_emit(buf, "%d\n", snp_vmpl); 2676 } 2677 2678 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 2679 2680 static struct attribute *vmpl_attrs[] = { 2681 &vmpl_attr.attr, 2682 NULL 2683 }; 2684 2685 static struct attribute_group sev_attr_group = { 2686 .attrs = vmpl_attrs, 2687 }; 2688 2689 static int __init sev_sysfs_init(void) 2690 { 2691 struct kobject *sev_kobj; 2692 struct device *dev_root; 2693 int ret; 2694 2695 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2696 return -ENODEV; 2697 2698 dev_root = bus_get_dev_root(&cpu_subsys); 2699 if (!dev_root) 2700 return -ENODEV; 2701 2702 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 2703 put_device(dev_root); 2704 2705 if (!sev_kobj) 2706 return -ENOMEM; 2707 2708 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 2709 if (ret) 2710 kobject_put(sev_kobj); 2711 2712 return ret; 2713 } 2714 arch_initcall(sev_sysfs_init); 2715 #endif // CONFIG_SYSFS 2716 2717 static void free_shared_pages(void *buf, size_t sz) 2718 { 2719 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2720 int ret; 2721 2722 if (!buf) 2723 return; 2724 2725 ret = set_memory_encrypted((unsigned long)buf, npages); 2726 if (ret) { 2727 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 2728 return; 2729 } 2730 2731 __free_pages(virt_to_page(buf), get_order(sz)); 2732 } 2733 2734 static void *alloc_shared_pages(size_t sz) 2735 { 2736 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2737 struct page *page; 2738 int ret; 2739 2740 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 2741 if (!page) 2742 return NULL; 2743 2744 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 2745 if (ret) { 2746 pr_err("failed to mark page shared, ret=%d\n", ret); 2747 __free_pages(page, get_order(sz)); 2748 return NULL; 2749 } 2750 2751 return page_address(page); 2752 } 2753 2754 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 2755 { 2756 u8 *key = NULL; 2757 2758 switch (id) { 2759 case 0: 2760 *seqno = &secrets->os_area.msg_seqno_0; 2761 key = secrets->vmpck0; 2762 break; 2763 case 1: 2764 *seqno = &secrets->os_area.msg_seqno_1; 2765 key = secrets->vmpck1; 2766 break; 2767 case 2: 2768 *seqno = &secrets->os_area.msg_seqno_2; 2769 key = secrets->vmpck2; 2770 break; 2771 case 3: 2772 *seqno = &secrets->os_area.msg_seqno_3; 2773 key = secrets->vmpck3; 2774 break; 2775 default: 2776 break; 2777 } 2778 2779 return key; 2780 } 2781 2782 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 2783 { 2784 struct aesgcm_ctx *ctx; 2785 2786 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 2787 if (!ctx) 2788 return NULL; 2789 2790 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 2791 pr_err("Crypto context initialization failed\n"); 2792 kfree(ctx); 2793 return NULL; 2794 } 2795 2796 return ctx; 2797 } 2798 2799 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 2800 { 2801 /* Adjust the default VMPCK key based on the executing VMPL level */ 2802 if (vmpck_id == -1) 2803 vmpck_id = snp_vmpl; 2804 2805 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 2806 if (!mdesc->vmpck) { 2807 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 2808 return -EINVAL; 2809 } 2810 2811 /* Verify that VMPCK is not zero. */ 2812 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 2813 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 2814 return -EINVAL; 2815 } 2816 2817 mdesc->vmpck_id = vmpck_id; 2818 2819 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 2820 if (!mdesc->ctx) 2821 return -ENOMEM; 2822 2823 return 0; 2824 } 2825 EXPORT_SYMBOL_GPL(snp_msg_init); 2826 2827 struct snp_msg_desc *snp_msg_alloc(void) 2828 { 2829 struct snp_msg_desc *mdesc; 2830 void __iomem *mem; 2831 2832 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 2833 2834 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); 2835 if (!mdesc) 2836 return ERR_PTR(-ENOMEM); 2837 2838 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 2839 if (!mem) 2840 goto e_free_mdesc; 2841 2842 mdesc->secrets = (__force struct snp_secrets_page *)mem; 2843 2844 /* Allocate the shared page used for the request and response message. */ 2845 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2846 if (!mdesc->request) 2847 goto e_unmap; 2848 2849 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2850 if (!mdesc->response) 2851 goto e_free_request; 2852 2853 return mdesc; 2854 2855 e_free_request: 2856 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2857 e_unmap: 2858 iounmap(mem); 2859 e_free_mdesc: 2860 kfree(mdesc); 2861 2862 return ERR_PTR(-ENOMEM); 2863 } 2864 EXPORT_SYMBOL_GPL(snp_msg_alloc); 2865 2866 void snp_msg_free(struct snp_msg_desc *mdesc) 2867 { 2868 if (!mdesc) 2869 return; 2870 2871 kfree(mdesc->ctx); 2872 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 2873 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2874 iounmap((__force void __iomem *)mdesc->secrets); 2875 2876 memset(mdesc, 0, sizeof(*mdesc)); 2877 kfree(mdesc); 2878 } 2879 EXPORT_SYMBOL_GPL(snp_msg_free); 2880 2881 /* Mutex to serialize the shared buffer access and command handling. */ 2882 static DEFINE_MUTEX(snp_cmd_mutex); 2883 2884 /* 2885 * If an error is received from the host or AMD Secure Processor (ASP) there 2886 * are two options. Either retry the exact same encrypted request or discontinue 2887 * using the VMPCK. 2888 * 2889 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 2890 * encrypt the requests. The IV for this scheme is the sequence number. GCM 2891 * cannot tolerate IV reuse. 2892 * 2893 * The ASP FW v1.51 only increments the sequence numbers on a successful 2894 * guest<->ASP back and forth and only accepts messages at its exact sequence 2895 * number. 2896 * 2897 * So if the sequence number were to be reused the encryption scheme is 2898 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 2899 * will reject the request. 2900 */ 2901 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 2902 { 2903 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 2904 mdesc->vmpck_id); 2905 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 2906 mdesc->vmpck = NULL; 2907 } 2908 2909 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2910 { 2911 u64 count; 2912 2913 lockdep_assert_held(&snp_cmd_mutex); 2914 2915 /* Read the current message sequence counter from secrets pages */ 2916 count = *mdesc->os_area_msg_seqno; 2917 2918 return count + 1; 2919 } 2920 2921 /* Return a non-zero on success */ 2922 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2923 { 2924 u64 count = __snp_get_msg_seqno(mdesc); 2925 2926 /* 2927 * The message sequence counter for the SNP guest request is a 64-bit 2928 * value but the version 2 of GHCB specification defines a 32-bit storage 2929 * for it. If the counter exceeds the 32-bit value then return zero. 2930 * The caller should check the return value, but if the caller happens to 2931 * not check the value and use it, then the firmware treats zero as an 2932 * invalid number and will fail the message request. 2933 */ 2934 if (count >= UINT_MAX) { 2935 pr_err("request message sequence counter overflow\n"); 2936 return 0; 2937 } 2938 2939 return count; 2940 } 2941 2942 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 2943 { 2944 /* 2945 * The counter is also incremented by the PSP, so increment it by 2 2946 * and save in secrets page. 2947 */ 2948 *mdesc->os_area_msg_seqno += 2; 2949 } 2950 2951 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 2952 { 2953 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 2954 struct snp_guest_msg *req_msg = &mdesc->secret_request; 2955 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 2956 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 2957 struct aesgcm_ctx *ctx = mdesc->ctx; 2958 u8 iv[GCM_AES_IV_SIZE] = {}; 2959 2960 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 2961 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 2962 resp_msg_hdr->msg_sz); 2963 2964 /* Copy response from shared memory to encrypted memory. */ 2965 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 2966 2967 /* Verify that the sequence counter is incremented by 1 */ 2968 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 2969 return -EBADMSG; 2970 2971 /* Verify response message type and version number. */ 2972 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 2973 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 2974 return -EBADMSG; 2975 2976 /* 2977 * If the message size is greater than our buffer length then return 2978 * an error. 2979 */ 2980 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 2981 return -EBADMSG; 2982 2983 /* Decrypt the payload */ 2984 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 2985 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 2986 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 2987 return -EBADMSG; 2988 2989 return 0; 2990 } 2991 2992 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 2993 { 2994 struct snp_guest_msg *msg = &mdesc->secret_request; 2995 struct snp_guest_msg_hdr *hdr = &msg->hdr; 2996 struct aesgcm_ctx *ctx = mdesc->ctx; 2997 u8 iv[GCM_AES_IV_SIZE] = {}; 2998 2999 memset(msg, 0, sizeof(*msg)); 3000 3001 hdr->algo = SNP_AEAD_AES_256_GCM; 3002 hdr->hdr_version = MSG_HDR_VER; 3003 hdr->hdr_sz = sizeof(*hdr); 3004 hdr->msg_type = req->msg_type; 3005 hdr->msg_version = req->msg_version; 3006 hdr->msg_seqno = seqno; 3007 hdr->msg_vmpck = req->vmpck_id; 3008 hdr->msg_sz = req->req_sz; 3009 3010 /* Verify the sequence number is non-zero */ 3011 if (!hdr->msg_seqno) 3012 return -ENOSR; 3013 3014 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 3015 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 3016 3017 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 3018 return -EBADMSG; 3019 3020 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 3021 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 3022 AAD_LEN, iv, hdr->authtag); 3023 3024 return 0; 3025 } 3026 3027 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3028 struct snp_guest_request_ioctl *rio) 3029 { 3030 unsigned long req_start = jiffies; 3031 unsigned int override_npages = 0; 3032 u64 override_err = 0; 3033 int rc; 3034 3035 retry_request: 3036 /* 3037 * Call firmware to process the request. In this function the encrypted 3038 * message enters shared memory with the host. So after this call the 3039 * sequence number must be incremented or the VMPCK must be deleted to 3040 * prevent reuse of the IV. 3041 */ 3042 rc = snp_issue_guest_request(req, &req->input, rio); 3043 switch (rc) { 3044 case -ENOSPC: 3045 /* 3046 * If the extended guest request fails due to having too 3047 * small of a certificate data buffer, retry the same 3048 * guest request without the extended data request in 3049 * order to increment the sequence number and thus avoid 3050 * IV reuse. 3051 */ 3052 override_npages = req->input.data_npages; 3053 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3054 3055 /* 3056 * Override the error to inform callers the given extended 3057 * request buffer size was too small and give the caller the 3058 * required buffer size. 3059 */ 3060 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 3061 3062 /* 3063 * If this call to the firmware succeeds, the sequence number can 3064 * be incremented allowing for continued use of the VMPCK. If 3065 * there is an error reflected in the return value, this value 3066 * is checked further down and the result will be the deletion 3067 * of the VMPCK and the error code being propagated back to the 3068 * user as an ioctl() return code. 3069 */ 3070 goto retry_request; 3071 3072 /* 3073 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 3074 * throttled. Retry in the driver to avoid returning and reusing the 3075 * message sequence number on a different message. 3076 */ 3077 case -EAGAIN: 3078 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 3079 rc = -ETIMEDOUT; 3080 break; 3081 } 3082 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 3083 goto retry_request; 3084 } 3085 3086 /* 3087 * Increment the message sequence number. There is no harm in doing 3088 * this now because decryption uses the value stored in the response 3089 * structure and any failure will wipe the VMPCK, preventing further 3090 * use anyway. 3091 */ 3092 snp_inc_msg_seqno(mdesc); 3093 3094 if (override_err) { 3095 rio->exitinfo2 = override_err; 3096 3097 /* 3098 * If an extended guest request was issued and the supplied certificate 3099 * buffer was not large enough, a standard guest request was issued to 3100 * prevent IV reuse. If the standard request was successful, return -EIO 3101 * back to the caller as would have originally been returned. 3102 */ 3103 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3104 rc = -EIO; 3105 } 3106 3107 if (override_npages) 3108 req->input.data_npages = override_npages; 3109 3110 return rc; 3111 } 3112 3113 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3114 struct snp_guest_request_ioctl *rio) 3115 { 3116 u64 seqno; 3117 int rc; 3118 3119 guard(mutex)(&snp_cmd_mutex); 3120 3121 /* Check if the VMPCK is not empty */ 3122 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 3123 pr_err_ratelimited("VMPCK is disabled\n"); 3124 return -ENOTTY; 3125 } 3126 3127 /* Get message sequence and verify that its a non-zero */ 3128 seqno = snp_get_msg_seqno(mdesc); 3129 if (!seqno) 3130 return -EIO; 3131 3132 /* Clear shared memory's response for the host to populate. */ 3133 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 3134 3135 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 3136 rc = enc_payload(mdesc, seqno, req); 3137 if (rc) 3138 return rc; 3139 3140 /* 3141 * Write the fully encrypted request to the shared unencrypted 3142 * request page. 3143 */ 3144 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 3145 3146 /* Initialize the input address for guest request */ 3147 req->input.req_gpa = __pa(mdesc->request); 3148 req->input.resp_gpa = __pa(mdesc->response); 3149 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; 3150 3151 rc = __handle_guest_request(mdesc, req, rio); 3152 if (rc) { 3153 if (rc == -EIO && 3154 rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3155 return rc; 3156 3157 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 3158 rc, rio->exitinfo2); 3159 3160 snp_disable_vmpck(mdesc); 3161 return rc; 3162 } 3163 3164 rc = verify_and_dec_payload(mdesc, req); 3165 if (rc) { 3166 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 3167 snp_disable_vmpck(mdesc); 3168 return rc; 3169 } 3170 3171 return 0; 3172 } 3173 EXPORT_SYMBOL_GPL(snp_send_guest_request); 3174 3175 static int __init snp_get_tsc_info(void) 3176 { 3177 struct snp_guest_request_ioctl *rio; 3178 struct snp_tsc_info_resp *tsc_resp; 3179 struct snp_tsc_info_req *tsc_req; 3180 struct snp_msg_desc *mdesc; 3181 struct snp_guest_req *req; 3182 int rc = -ENOMEM; 3183 3184 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); 3185 if (!tsc_req) 3186 return rc; 3187 3188 /* 3189 * The intermediate response buffer is used while decrypting the 3190 * response payload. Make sure that it has enough space to cover 3191 * the authtag. 3192 */ 3193 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 3194 if (!tsc_resp) 3195 goto e_free_tsc_req; 3196 3197 req = kzalloc(sizeof(*req), GFP_KERNEL); 3198 if (!req) 3199 goto e_free_tsc_resp; 3200 3201 rio = kzalloc(sizeof(*rio), GFP_KERNEL); 3202 if (!rio) 3203 goto e_free_req; 3204 3205 mdesc = snp_msg_alloc(); 3206 if (IS_ERR_OR_NULL(mdesc)) 3207 goto e_free_rio; 3208 3209 rc = snp_msg_init(mdesc, snp_vmpl); 3210 if (rc) 3211 goto e_free_mdesc; 3212 3213 req->msg_version = MSG_HDR_VER; 3214 req->msg_type = SNP_MSG_TSC_INFO_REQ; 3215 req->vmpck_id = snp_vmpl; 3216 req->req_buf = tsc_req; 3217 req->req_sz = sizeof(*tsc_req); 3218 req->resp_buf = (void *)tsc_resp; 3219 req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 3220 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3221 3222 rc = snp_send_guest_request(mdesc, req, rio); 3223 if (rc) 3224 goto e_request; 3225 3226 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 3227 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 3228 tsc_resp->tsc_factor); 3229 3230 if (!tsc_resp->status) { 3231 snp_tsc_scale = tsc_resp->tsc_scale; 3232 snp_tsc_offset = tsc_resp->tsc_offset; 3233 } else { 3234 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 3235 rc = -EIO; 3236 } 3237 3238 e_request: 3239 /* The response buffer contains sensitive data, explicitly clear it. */ 3240 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 3241 e_free_mdesc: 3242 snp_msg_free(mdesc); 3243 e_free_rio: 3244 kfree(rio); 3245 e_free_req: 3246 kfree(req); 3247 e_free_tsc_resp: 3248 kfree(tsc_resp); 3249 e_free_tsc_req: 3250 kfree(tsc_req); 3251 3252 return rc; 3253 } 3254 3255 void __init snp_secure_tsc_prepare(void) 3256 { 3257 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3258 return; 3259 3260 if (snp_get_tsc_info()) { 3261 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 3262 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 3263 } 3264 3265 pr_debug("SecureTSC enabled"); 3266 } 3267 3268 static unsigned long securetsc_get_tsc_khz(void) 3269 { 3270 return snp_tsc_freq_khz; 3271 } 3272 3273 void __init snp_secure_tsc_init(void) 3274 { 3275 unsigned long long tsc_freq_mhz; 3276 3277 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3278 return; 3279 3280 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 3281 rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 3282 snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); 3283 3284 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 3285 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 3286 } 3287