1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #define DISABLE_BRANCH_PROFILING 13 14 #include <linux/sched/debug.h> /* For show_regs() */ 15 #include <linux/percpu-defs.h> 16 #include <linux/cc_platform.h> 17 #include <linux/printk.h> 18 #include <linux/mm_types.h> 19 #include <linux/set_memory.h> 20 #include <linux/memblock.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/cpumask.h> 24 #include <linux/efi.h> 25 #include <linux/platform_device.h> 26 #include <linux/io.h> 27 #include <linux/psp-sev.h> 28 #include <linux/dmi.h> 29 #include <uapi/linux/sev-guest.h> 30 #include <crypto/gcm.h> 31 32 #include <asm/init.h> 33 #include <asm/cpu_entry_area.h> 34 #include <asm/stacktrace.h> 35 #include <asm/sev.h> 36 #include <asm/insn-eval.h> 37 #include <asm/fpu/xcr.h> 38 #include <asm/processor.h> 39 #include <asm/realmode.h> 40 #include <asm/setup.h> 41 #include <asm/traps.h> 42 #include <asm/svm.h> 43 #include <asm/smp.h> 44 #include <asm/cpu.h> 45 #include <asm/apic.h> 46 #include <asm/cpuid.h> 47 #include <asm/cmdline.h> 48 49 #define DR7_RESET_VALUE 0x400 50 51 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 52 #define AP_INIT_CS_LIMIT 0xffff 53 #define AP_INIT_DS_LIMIT 0xffff 54 #define AP_INIT_LDTR_LIMIT 0xffff 55 #define AP_INIT_GDTR_LIMIT 0xffff 56 #define AP_INIT_IDTR_LIMIT 0xffff 57 #define AP_INIT_TR_LIMIT 0xffff 58 #define AP_INIT_RFLAGS_DEFAULT 0x2 59 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 60 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 61 #define AP_INIT_XCR0_DEFAULT 0x1 62 #define AP_INIT_X87_FTW_DEFAULT 0x5555 63 #define AP_INIT_X87_FCW_DEFAULT 0x0040 64 #define AP_INIT_CR0_DEFAULT 0x60000010 65 #define AP_INIT_MXCSR_DEFAULT 0x1f80 66 67 static const char * const sev_status_feat_names[] = { 68 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 69 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 70 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 71 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 72 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 73 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 74 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 75 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 76 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 77 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 78 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 79 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 80 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 81 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 82 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 83 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 84 }; 85 86 /* For early boot hypervisor communication in SEV-ES enabled guests */ 87 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 88 89 /* 90 * Needs to be in the .data section because we need it NULL before bss is 91 * cleared 92 */ 93 static struct ghcb *boot_ghcb __section(".data"); 94 95 /* Bitmap of SEV features supported by the hypervisor */ 96 static u64 sev_hv_features __ro_after_init; 97 98 /* Secrets page physical address from the CC blob */ 99 static u64 secrets_pa __ro_after_init; 100 101 /* 102 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 103 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 104 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 105 */ 106 static u64 snp_tsc_scale __ro_after_init; 107 static u64 snp_tsc_offset __ro_after_init; 108 static u64 snp_tsc_freq_khz __ro_after_init; 109 110 /* #VC handler runtime per-CPU data */ 111 struct sev_es_runtime_data { 112 struct ghcb ghcb_page; 113 114 /* 115 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 116 * It is needed when an NMI happens while the #VC handler uses the real 117 * GHCB, and the NMI handler itself is causing another #VC exception. In 118 * that case the GHCB content of the first handler needs to be backed up 119 * and restored. 120 */ 121 struct ghcb backup_ghcb; 122 123 /* 124 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 125 * There is no need for it to be atomic, because nothing is written to 126 * the GHCB between the read and the write of ghcb_active. So it is safe 127 * to use it when a nested #VC exception happens before the write. 128 * 129 * This is necessary for example in the #VC->NMI->#VC case when the NMI 130 * happens while the first #VC handler uses the GHCB. When the NMI code 131 * raises a second #VC handler it might overwrite the contents of the 132 * GHCB written by the first handler. To avoid this the content of the 133 * GHCB is saved and restored when the GHCB is detected to be in use 134 * already. 135 */ 136 bool ghcb_active; 137 bool backup_ghcb_active; 138 139 /* 140 * Cached DR7 value - write it on DR7 writes and return it on reads. 141 * That value will never make it to the real hardware DR7 as debugging 142 * is currently unsupported in SEV-ES guests. 143 */ 144 unsigned long dr7; 145 }; 146 147 struct ghcb_state { 148 struct ghcb *ghcb; 149 }; 150 151 /* For early boot SVSM communication */ 152 static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); 153 154 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 155 static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 156 static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); 157 static DEFINE_PER_CPU(u64, svsm_caa_pa); 158 159 static __always_inline bool on_vc_stack(struct pt_regs *regs) 160 { 161 unsigned long sp = regs->sp; 162 163 /* User-mode RSP is not trusted */ 164 if (user_mode(regs)) 165 return false; 166 167 /* SYSCALL gap still has user-mode RSP */ 168 if (ip_within_syscall_gap(regs)) 169 return false; 170 171 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 172 } 173 174 /* 175 * This function handles the case when an NMI is raised in the #VC 176 * exception handler entry code, before the #VC handler has switched off 177 * its IST stack. In this case, the IST entry for #VC must be adjusted, 178 * so that any nested #VC exception will not overwrite the stack 179 * contents of the interrupted #VC handler. 180 * 181 * The IST entry is adjusted unconditionally so that it can be also be 182 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 183 * nested sev_es_ist_exit() call may adjust back the IST entry too 184 * early. 185 * 186 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 187 * on the NMI IST stack, as they are only called from NMI handling code 188 * right now. 189 */ 190 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 191 { 192 unsigned long old_ist, new_ist; 193 194 /* Read old IST entry */ 195 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 196 197 /* 198 * If NMI happened while on the #VC IST stack, set the new IST 199 * value below regs->sp, so that the interrupted stack frame is 200 * not overwritten by subsequent #VC exceptions. 201 */ 202 if (on_vc_stack(regs)) 203 new_ist = regs->sp; 204 205 /* 206 * Reserve additional 8 bytes and store old IST value so this 207 * adjustment can be unrolled in __sev_es_ist_exit(). 208 */ 209 new_ist -= sizeof(old_ist); 210 *(unsigned long *)new_ist = old_ist; 211 212 /* Set new IST entry */ 213 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 214 } 215 216 void noinstr __sev_es_ist_exit(void) 217 { 218 unsigned long ist; 219 220 /* Read IST entry */ 221 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 222 223 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 224 return; 225 226 /* Read back old IST entry and write it to the TSS */ 227 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 228 } 229 230 /* 231 * Nothing shall interrupt this code path while holding the per-CPU 232 * GHCB. The backup GHCB is only for NMIs interrupting this path. 233 * 234 * Callers must disable local interrupts around it. 235 */ 236 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 237 { 238 struct sev_es_runtime_data *data; 239 struct ghcb *ghcb; 240 241 WARN_ON(!irqs_disabled()); 242 243 data = this_cpu_read(runtime_data); 244 ghcb = &data->ghcb_page; 245 246 if (unlikely(data->ghcb_active)) { 247 /* GHCB is already in use - save its contents */ 248 249 if (unlikely(data->backup_ghcb_active)) { 250 /* 251 * Backup-GHCB is also already in use. There is no way 252 * to continue here so just kill the machine. To make 253 * panic() work, mark GHCBs inactive so that messages 254 * can be printed out. 255 */ 256 data->ghcb_active = false; 257 data->backup_ghcb_active = false; 258 259 instrumentation_begin(); 260 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 261 instrumentation_end(); 262 } 263 264 /* Mark backup_ghcb active before writing to it */ 265 data->backup_ghcb_active = true; 266 267 state->ghcb = &data->backup_ghcb; 268 269 /* Backup GHCB content */ 270 *state->ghcb = *ghcb; 271 } else { 272 state->ghcb = NULL; 273 data->ghcb_active = true; 274 } 275 276 return ghcb; 277 } 278 279 static inline u64 sev_es_rd_ghcb_msr(void) 280 { 281 return __rdmsr(MSR_AMD64_SEV_ES_GHCB); 282 } 283 284 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 285 { 286 u32 low, high; 287 288 low = (u32)(val); 289 high = (u32)(val >> 32); 290 291 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 292 } 293 294 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 295 unsigned char *buffer) 296 { 297 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 298 } 299 300 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 301 { 302 char buffer[MAX_INSN_SIZE]; 303 int insn_bytes; 304 305 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 306 if (insn_bytes == 0) { 307 /* Nothing could be copied */ 308 ctxt->fi.vector = X86_TRAP_PF; 309 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 310 ctxt->fi.cr2 = ctxt->regs->ip; 311 return ES_EXCEPTION; 312 } else if (insn_bytes == -EINVAL) { 313 /* Effective RIP could not be calculated */ 314 ctxt->fi.vector = X86_TRAP_GP; 315 ctxt->fi.error_code = 0; 316 ctxt->fi.cr2 = 0; 317 return ES_EXCEPTION; 318 } 319 320 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 321 return ES_DECODE_FAILED; 322 323 if (ctxt->insn.immediate.got) 324 return ES_OK; 325 else 326 return ES_DECODE_FAILED; 327 } 328 329 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 330 { 331 char buffer[MAX_INSN_SIZE]; 332 int res, ret; 333 334 res = vc_fetch_insn_kernel(ctxt, buffer); 335 if (res) { 336 ctxt->fi.vector = X86_TRAP_PF; 337 ctxt->fi.error_code = X86_PF_INSTR; 338 ctxt->fi.cr2 = ctxt->regs->ip; 339 return ES_EXCEPTION; 340 } 341 342 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 343 if (ret < 0) 344 return ES_DECODE_FAILED; 345 else 346 return ES_OK; 347 } 348 349 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 350 { 351 if (user_mode(ctxt->regs)) 352 return __vc_decode_user_insn(ctxt); 353 else 354 return __vc_decode_kern_insn(ctxt); 355 } 356 357 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 358 char *dst, char *buf, size_t size) 359 { 360 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 361 362 /* 363 * This function uses __put_user() independent of whether kernel or user 364 * memory is accessed. This works fine because __put_user() does no 365 * sanity checks of the pointer being accessed. All that it does is 366 * to report when the access failed. 367 * 368 * Also, this function runs in atomic context, so __put_user() is not 369 * allowed to sleep. The page-fault handler detects that it is running 370 * in atomic context and will not try to take mmap_sem and handle the 371 * fault, so additional pagefault_enable()/disable() calls are not 372 * needed. 373 * 374 * The access can't be done via copy_to_user() here because 375 * vc_write_mem() must not use string instructions to access unsafe 376 * memory. The reason is that MOVS is emulated by the #VC handler by 377 * splitting the move up into a read and a write and taking a nested #VC 378 * exception on whatever of them is the MMIO access. Using string 379 * instructions here would cause infinite nesting. 380 */ 381 switch (size) { 382 case 1: { 383 u8 d1; 384 u8 __user *target = (u8 __user *)dst; 385 386 memcpy(&d1, buf, 1); 387 if (__put_user(d1, target)) 388 goto fault; 389 break; 390 } 391 case 2: { 392 u16 d2; 393 u16 __user *target = (u16 __user *)dst; 394 395 memcpy(&d2, buf, 2); 396 if (__put_user(d2, target)) 397 goto fault; 398 break; 399 } 400 case 4: { 401 u32 d4; 402 u32 __user *target = (u32 __user *)dst; 403 404 memcpy(&d4, buf, 4); 405 if (__put_user(d4, target)) 406 goto fault; 407 break; 408 } 409 case 8: { 410 u64 d8; 411 u64 __user *target = (u64 __user *)dst; 412 413 memcpy(&d8, buf, 8); 414 if (__put_user(d8, target)) 415 goto fault; 416 break; 417 } 418 default: 419 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 420 return ES_UNSUPPORTED; 421 } 422 423 return ES_OK; 424 425 fault: 426 if (user_mode(ctxt->regs)) 427 error_code |= X86_PF_USER; 428 429 ctxt->fi.vector = X86_TRAP_PF; 430 ctxt->fi.error_code = error_code; 431 ctxt->fi.cr2 = (unsigned long)dst; 432 433 return ES_EXCEPTION; 434 } 435 436 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 437 char *src, char *buf, size_t size) 438 { 439 unsigned long error_code = X86_PF_PROT; 440 441 /* 442 * This function uses __get_user() independent of whether kernel or user 443 * memory is accessed. This works fine because __get_user() does no 444 * sanity checks of the pointer being accessed. All that it does is 445 * to report when the access failed. 446 * 447 * Also, this function runs in atomic context, so __get_user() is not 448 * allowed to sleep. The page-fault handler detects that it is running 449 * in atomic context and will not try to take mmap_sem and handle the 450 * fault, so additional pagefault_enable()/disable() calls are not 451 * needed. 452 * 453 * The access can't be done via copy_from_user() here because 454 * vc_read_mem() must not use string instructions to access unsafe 455 * memory. The reason is that MOVS is emulated by the #VC handler by 456 * splitting the move up into a read and a write and taking a nested #VC 457 * exception on whatever of them is the MMIO access. Using string 458 * instructions here would cause infinite nesting. 459 */ 460 switch (size) { 461 case 1: { 462 u8 d1; 463 u8 __user *s = (u8 __user *)src; 464 465 if (__get_user(d1, s)) 466 goto fault; 467 memcpy(buf, &d1, 1); 468 break; 469 } 470 case 2: { 471 u16 d2; 472 u16 __user *s = (u16 __user *)src; 473 474 if (__get_user(d2, s)) 475 goto fault; 476 memcpy(buf, &d2, 2); 477 break; 478 } 479 case 4: { 480 u32 d4; 481 u32 __user *s = (u32 __user *)src; 482 483 if (__get_user(d4, s)) 484 goto fault; 485 memcpy(buf, &d4, 4); 486 break; 487 } 488 case 8: { 489 u64 d8; 490 u64 __user *s = (u64 __user *)src; 491 if (__get_user(d8, s)) 492 goto fault; 493 memcpy(buf, &d8, 8); 494 break; 495 } 496 default: 497 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 498 return ES_UNSUPPORTED; 499 } 500 501 return ES_OK; 502 503 fault: 504 if (user_mode(ctxt->regs)) 505 error_code |= X86_PF_USER; 506 507 ctxt->fi.vector = X86_TRAP_PF; 508 ctxt->fi.error_code = error_code; 509 ctxt->fi.cr2 = (unsigned long)src; 510 511 return ES_EXCEPTION; 512 } 513 514 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 515 unsigned long vaddr, phys_addr_t *paddr) 516 { 517 unsigned long va = (unsigned long)vaddr; 518 unsigned int level; 519 phys_addr_t pa; 520 pgd_t *pgd; 521 pte_t *pte; 522 523 pgd = __va(read_cr3_pa()); 524 pgd = &pgd[pgd_index(va)]; 525 pte = lookup_address_in_pgd(pgd, va, &level); 526 if (!pte) { 527 ctxt->fi.vector = X86_TRAP_PF; 528 ctxt->fi.cr2 = vaddr; 529 ctxt->fi.error_code = 0; 530 531 if (user_mode(ctxt->regs)) 532 ctxt->fi.error_code |= X86_PF_USER; 533 534 return ES_EXCEPTION; 535 } 536 537 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 538 /* Emulated MMIO to/from encrypted memory not supported */ 539 return ES_UNSUPPORTED; 540 541 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 542 pa |= va & ~page_level_mask(level); 543 544 *paddr = pa; 545 546 return ES_OK; 547 } 548 549 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) 550 { 551 BUG_ON(size > 4); 552 553 if (user_mode(ctxt->regs)) { 554 struct thread_struct *t = ¤t->thread; 555 struct io_bitmap *iobm = t->io_bitmap; 556 size_t idx; 557 558 if (!iobm) 559 goto fault; 560 561 for (idx = port; idx < port + size; ++idx) { 562 if (test_bit(idx, iobm->bitmap)) 563 goto fault; 564 } 565 } 566 567 return ES_OK; 568 569 fault: 570 ctxt->fi.vector = X86_TRAP_GP; 571 ctxt->fi.error_code = 0; 572 573 return ES_EXCEPTION; 574 } 575 576 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 577 { 578 long error_code = ctxt->fi.error_code; 579 int trapnr = ctxt->fi.vector; 580 581 ctxt->regs->orig_ax = ctxt->fi.error_code; 582 583 switch (trapnr) { 584 case X86_TRAP_GP: 585 exc_general_protection(ctxt->regs, error_code); 586 break; 587 case X86_TRAP_UD: 588 exc_invalid_op(ctxt->regs); 589 break; 590 case X86_TRAP_PF: 591 write_cr2(ctxt->fi.cr2); 592 exc_page_fault(ctxt->regs, error_code); 593 break; 594 case X86_TRAP_AC: 595 exc_alignment_check(ctxt->regs, error_code); 596 break; 597 default: 598 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 599 BUG(); 600 } 601 } 602 603 /* Include code shared with pre-decompression boot stage */ 604 #include "shared.c" 605 606 static inline struct svsm_ca *svsm_get_caa(void) 607 { 608 /* 609 * Use rIP-relative references when called early in the boot. If 610 * ->use_cas is set, then it is late in the boot and no need 611 * to worry about rIP-relative references. 612 */ 613 if (RIP_REL_REF(sev_cfg).use_cas) 614 return this_cpu_read(svsm_caa); 615 else 616 return RIP_REL_REF(boot_svsm_caa); 617 } 618 619 static u64 svsm_get_caa_pa(void) 620 { 621 /* 622 * Use rIP-relative references when called early in the boot. If 623 * ->use_cas is set, then it is late in the boot and no need 624 * to worry about rIP-relative references. 625 */ 626 if (RIP_REL_REF(sev_cfg).use_cas) 627 return this_cpu_read(svsm_caa_pa); 628 else 629 return RIP_REL_REF(boot_svsm_caa_pa); 630 } 631 632 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 633 { 634 struct sev_es_runtime_data *data; 635 struct ghcb *ghcb; 636 637 WARN_ON(!irqs_disabled()); 638 639 data = this_cpu_read(runtime_data); 640 ghcb = &data->ghcb_page; 641 642 if (state->ghcb) { 643 /* Restore GHCB from Backup */ 644 *ghcb = *state->ghcb; 645 data->backup_ghcb_active = false; 646 state->ghcb = NULL; 647 } else { 648 /* 649 * Invalidate the GHCB so a VMGEXIT instruction issued 650 * from userspace won't appear to be valid. 651 */ 652 vc_ghcb_invalidate(ghcb); 653 data->ghcb_active = false; 654 } 655 } 656 657 static int svsm_perform_call_protocol(struct svsm_call *call) 658 { 659 struct ghcb_state state; 660 unsigned long flags; 661 struct ghcb *ghcb; 662 int ret; 663 664 /* 665 * This can be called very early in the boot, use native functions in 666 * order to avoid paravirt issues. 667 */ 668 flags = native_local_irq_save(); 669 670 /* 671 * Use rip-relative references when called early in the boot. If 672 * ghcbs_initialized is set, then it is late in the boot and no need 673 * to worry about rip-relative references in called functions. 674 */ 675 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 676 ghcb = __sev_get_ghcb(&state); 677 else if (RIP_REL_REF(boot_ghcb)) 678 ghcb = RIP_REL_REF(boot_ghcb); 679 else 680 ghcb = NULL; 681 682 do { 683 ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) 684 : svsm_perform_msr_protocol(call); 685 } while (ret == -EAGAIN); 686 687 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 688 __sev_put_ghcb(&state); 689 690 native_local_irq_restore(flags); 691 692 return ret; 693 } 694 695 void noinstr __sev_es_nmi_complete(void) 696 { 697 struct ghcb_state state; 698 struct ghcb *ghcb; 699 700 ghcb = __sev_get_ghcb(&state); 701 702 vc_ghcb_invalidate(ghcb); 703 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 704 ghcb_set_sw_exit_info_1(ghcb, 0); 705 ghcb_set_sw_exit_info_2(ghcb, 0); 706 707 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 708 VMGEXIT(); 709 710 __sev_put_ghcb(&state); 711 } 712 713 static u64 __init get_snp_jump_table_addr(void) 714 { 715 struct snp_secrets_page *secrets; 716 void __iomem *mem; 717 u64 addr; 718 719 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 720 if (!mem) { 721 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 722 return 0; 723 } 724 725 secrets = (__force struct snp_secrets_page *)mem; 726 727 addr = secrets->os_area.ap_jump_table_pa; 728 iounmap(mem); 729 730 return addr; 731 } 732 733 static u64 __init get_jump_table_addr(void) 734 { 735 struct ghcb_state state; 736 unsigned long flags; 737 struct ghcb *ghcb; 738 u64 ret = 0; 739 740 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 741 return get_snp_jump_table_addr(); 742 743 local_irq_save(flags); 744 745 ghcb = __sev_get_ghcb(&state); 746 747 vc_ghcb_invalidate(ghcb); 748 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 749 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 750 ghcb_set_sw_exit_info_2(ghcb, 0); 751 752 sev_es_wr_ghcb_msr(__pa(ghcb)); 753 VMGEXIT(); 754 755 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 756 ghcb_sw_exit_info_2_is_valid(ghcb)) 757 ret = ghcb->save.sw_exit_info_2; 758 759 __sev_put_ghcb(&state); 760 761 local_irq_restore(flags); 762 763 return ret; 764 } 765 766 static void __head 767 early_set_pages_state(unsigned long vaddr, unsigned long paddr, 768 unsigned long npages, enum psc_op op) 769 { 770 unsigned long paddr_end; 771 u64 val; 772 773 vaddr = vaddr & PAGE_MASK; 774 775 paddr = paddr & PAGE_MASK; 776 paddr_end = paddr + (npages << PAGE_SHIFT); 777 778 while (paddr < paddr_end) { 779 /* Page validation must be rescinded before changing to shared */ 780 if (op == SNP_PAGE_STATE_SHARED) 781 pvalidate_4k_page(vaddr, paddr, false); 782 783 /* 784 * Use the MSR protocol because this function can be called before 785 * the GHCB is established. 786 */ 787 sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); 788 VMGEXIT(); 789 790 val = sev_es_rd_ghcb_msr(); 791 792 if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) 793 goto e_term; 794 795 if (GHCB_MSR_PSC_RESP_VAL(val)) 796 goto e_term; 797 798 /* Page validation must be performed after changing to private */ 799 if (op == SNP_PAGE_STATE_PRIVATE) 800 pvalidate_4k_page(vaddr, paddr, true); 801 802 vaddr += PAGE_SIZE; 803 paddr += PAGE_SIZE; 804 } 805 806 return; 807 808 e_term: 809 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 810 } 811 812 void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, 813 unsigned long npages) 814 { 815 /* 816 * This can be invoked in early boot while running identity mapped, so 817 * use an open coded check for SNP instead of using cc_platform_has(). 818 * This eliminates worries about jump tables or checking boot_cpu_data 819 * in the cc_platform_has() function. 820 */ 821 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 822 return; 823 824 /* 825 * Ask the hypervisor to mark the memory pages as private in the RMP 826 * table. 827 */ 828 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); 829 } 830 831 void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, 832 unsigned long npages) 833 { 834 /* 835 * This can be invoked in early boot while running identity mapped, so 836 * use an open coded check for SNP instead of using cc_platform_has(). 837 * This eliminates worries about jump tables or checking boot_cpu_data 838 * in the cc_platform_has() function. 839 */ 840 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 841 return; 842 843 /* Ask hypervisor to mark the memory pages shared in the RMP table. */ 844 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); 845 } 846 847 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 848 unsigned long vaddr_end, int op) 849 { 850 struct ghcb_state state; 851 bool use_large_entry; 852 struct psc_hdr *hdr; 853 struct psc_entry *e; 854 unsigned long flags; 855 unsigned long pfn; 856 struct ghcb *ghcb; 857 int i; 858 859 hdr = &data->hdr; 860 e = data->entries; 861 862 memset(data, 0, sizeof(*data)); 863 i = 0; 864 865 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 866 hdr->end_entry = i; 867 868 if (is_vmalloc_addr((void *)vaddr)) { 869 pfn = vmalloc_to_pfn((void *)vaddr); 870 use_large_entry = false; 871 } else { 872 pfn = __pa(vaddr) >> PAGE_SHIFT; 873 use_large_entry = true; 874 } 875 876 e->gfn = pfn; 877 e->operation = op; 878 879 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 880 (vaddr_end - vaddr) >= PMD_SIZE) { 881 e->pagesize = RMP_PG_SIZE_2M; 882 vaddr += PMD_SIZE; 883 } else { 884 e->pagesize = RMP_PG_SIZE_4K; 885 vaddr += PAGE_SIZE; 886 } 887 888 e++; 889 i++; 890 } 891 892 /* Page validation must be rescinded before changing to shared */ 893 if (op == SNP_PAGE_STATE_SHARED) 894 pvalidate_pages(data); 895 896 local_irq_save(flags); 897 898 if (sev_cfg.ghcbs_initialized) 899 ghcb = __sev_get_ghcb(&state); 900 else 901 ghcb = boot_ghcb; 902 903 /* Invoke the hypervisor to perform the page state changes */ 904 if (!ghcb || vmgexit_psc(ghcb, data)) 905 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 906 907 if (sev_cfg.ghcbs_initialized) 908 __sev_put_ghcb(&state); 909 910 local_irq_restore(flags); 911 912 /* Page validation must be performed after changing to private */ 913 if (op == SNP_PAGE_STATE_PRIVATE) 914 pvalidate_pages(data); 915 916 return vaddr; 917 } 918 919 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 920 { 921 struct snp_psc_desc desc; 922 unsigned long vaddr_end; 923 924 /* Use the MSR protocol when a GHCB is not available. */ 925 if (!boot_ghcb) 926 return early_set_pages_state(vaddr, __pa(vaddr), npages, op); 927 928 vaddr = vaddr & PAGE_MASK; 929 vaddr_end = vaddr + (npages << PAGE_SHIFT); 930 931 while (vaddr < vaddr_end) 932 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 933 } 934 935 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 936 { 937 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 938 return; 939 940 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 941 } 942 943 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 944 { 945 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 946 return; 947 948 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 949 } 950 951 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 952 { 953 unsigned long vaddr, npages; 954 955 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 956 return; 957 958 vaddr = (unsigned long)__va(start); 959 npages = (end - start) >> PAGE_SHIFT; 960 961 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 962 } 963 964 static void set_pte_enc(pte_t *kpte, int level, void *va) 965 { 966 struct pte_enc_desc d = { 967 .kpte = kpte, 968 .pte_level = level, 969 .va = va, 970 .encrypt = true 971 }; 972 973 prepare_pte_enc(&d); 974 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 975 } 976 977 static void unshare_all_memory(void) 978 { 979 unsigned long addr, end, size, ghcb; 980 struct sev_es_runtime_data *data; 981 unsigned int npages, level; 982 bool skipped_addr; 983 pte_t *pte; 984 int cpu; 985 986 /* Unshare the direct mapping. */ 987 addr = PAGE_OFFSET; 988 end = PAGE_OFFSET + get_max_mapped(); 989 990 while (addr < end) { 991 pte = lookup_address(addr, &level); 992 size = page_level_size(level); 993 npages = size / PAGE_SIZE; 994 skipped_addr = false; 995 996 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 997 addr += size; 998 continue; 999 } 1000 1001 /* 1002 * Ensure that all the per-CPU GHCBs are made private at the 1003 * end of the unsharing loop so that the switch to the slower 1004 * MSR protocol happens last. 1005 */ 1006 for_each_possible_cpu(cpu) { 1007 data = per_cpu(runtime_data, cpu); 1008 ghcb = (unsigned long)&data->ghcb_page; 1009 1010 if (addr <= ghcb && ghcb <= addr + size) { 1011 skipped_addr = true; 1012 break; 1013 } 1014 } 1015 1016 if (!skipped_addr) { 1017 set_pte_enc(pte, level, (void *)addr); 1018 snp_set_memory_private(addr, npages); 1019 } 1020 addr += size; 1021 } 1022 1023 /* Unshare all bss decrypted memory. */ 1024 addr = (unsigned long)__start_bss_decrypted; 1025 end = (unsigned long)__start_bss_decrypted_unused; 1026 npages = (end - addr) >> PAGE_SHIFT; 1027 1028 for (; addr < end; addr += PAGE_SIZE) { 1029 pte = lookup_address(addr, &level); 1030 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 1031 continue; 1032 1033 set_pte_enc(pte, level, (void *)addr); 1034 } 1035 addr = (unsigned long)__start_bss_decrypted; 1036 snp_set_memory_private(addr, npages); 1037 1038 __flush_tlb_all(); 1039 } 1040 1041 /* Stop new private<->shared conversions */ 1042 void snp_kexec_begin(void) 1043 { 1044 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1045 return; 1046 1047 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1048 return; 1049 1050 /* 1051 * Crash kernel ends up here with interrupts disabled: can't wait for 1052 * conversions to finish. 1053 * 1054 * If race happened, just report and proceed. 1055 */ 1056 if (!set_memory_enc_stop_conversion()) 1057 pr_warn("Failed to stop shared<->private conversions\n"); 1058 } 1059 1060 void snp_kexec_finish(void) 1061 { 1062 struct sev_es_runtime_data *data; 1063 unsigned int level, cpu; 1064 unsigned long size; 1065 struct ghcb *ghcb; 1066 pte_t *pte; 1067 1068 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1069 return; 1070 1071 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1072 return; 1073 1074 unshare_all_memory(); 1075 1076 /* 1077 * Switch to using the MSR protocol to change per-CPU GHCBs to 1078 * private. All the per-CPU GHCBs have been switched back to private, 1079 * so can't do any more GHCB calls to the hypervisor beyond this point 1080 * until the kexec'ed kernel starts running. 1081 */ 1082 boot_ghcb = NULL; 1083 sev_cfg.ghcbs_initialized = false; 1084 1085 for_each_possible_cpu(cpu) { 1086 data = per_cpu(runtime_data, cpu); 1087 ghcb = &data->ghcb_page; 1088 pte = lookup_address((unsigned long)ghcb, &level); 1089 size = page_level_size(level); 1090 set_pte_enc(pte, level, (void *)ghcb); 1091 snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); 1092 } 1093 } 1094 1095 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1096 { 1097 int ret; 1098 1099 if (snp_vmpl) { 1100 struct svsm_call call = {}; 1101 unsigned long flags; 1102 1103 local_irq_save(flags); 1104 1105 call.caa = this_cpu_read(svsm_caa); 1106 call.rcx = __pa(va); 1107 1108 if (make_vmsa) { 1109 /* Protocol 0, Call ID 2 */ 1110 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1111 call.rdx = __pa(caa); 1112 call.r8 = apic_id; 1113 } else { 1114 /* Protocol 0, Call ID 3 */ 1115 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1116 } 1117 1118 ret = svsm_perform_call_protocol(&call); 1119 1120 local_irq_restore(flags); 1121 } else { 1122 /* 1123 * If the kernel runs at VMPL0, it can change the VMSA 1124 * bit for a page using the RMPADJUST instruction. 1125 * However, for the instruction to succeed it must 1126 * target the permissions of a lesser privileged (higher 1127 * numbered) VMPL level, so use VMPL1. 1128 */ 1129 u64 attrs = 1; 1130 1131 if (make_vmsa) 1132 attrs |= RMPADJUST_VMSA_PAGE_BIT; 1133 1134 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1135 } 1136 1137 return ret; 1138 } 1139 1140 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 1141 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 1142 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 1143 1144 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 1145 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 1146 1147 static void *snp_alloc_vmsa_page(int cpu) 1148 { 1149 struct page *p; 1150 1151 /* 1152 * Allocate VMSA page to work around the SNP erratum where the CPU will 1153 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 1154 * collides with the RMP entry of VMSA page. The recommended workaround 1155 * is to not use a large page. 1156 * 1157 * Allocate an 8k page which is also 8k-aligned. 1158 */ 1159 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 1160 if (!p) 1161 return NULL; 1162 1163 split_page(p, 1); 1164 1165 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 1166 __free_page(p); 1167 1168 return page_address(p + 1); 1169 } 1170 1171 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1172 { 1173 int err; 1174 1175 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1176 if (err) 1177 pr_err("clear VMSA page failed (%u), leaking page\n", err); 1178 else 1179 free_page((unsigned long)vmsa); 1180 } 1181 1182 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) 1183 { 1184 struct sev_es_save_area *cur_vmsa, *vmsa; 1185 struct ghcb_state state; 1186 struct svsm_ca *caa; 1187 unsigned long flags; 1188 struct ghcb *ghcb; 1189 u8 sipi_vector; 1190 int cpu, ret; 1191 u64 cr4; 1192 1193 /* 1194 * The hypervisor SNP feature support check has happened earlier, just check 1195 * the AP_CREATION one here. 1196 */ 1197 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 1198 return -EOPNOTSUPP; 1199 1200 /* 1201 * Verify the desired start IP against the known trampoline start IP 1202 * to catch any future new trampolines that may be introduced that 1203 * would require a new protected guest entry point. 1204 */ 1205 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 1206 "Unsupported SNP start_ip: %lx\n", start_ip)) 1207 return -EINVAL; 1208 1209 /* Override start_ip with known protected guest start IP */ 1210 start_ip = real_mode_header->sev_es_trampoline_start; 1211 1212 /* Find the logical CPU for the APIC ID */ 1213 for_each_present_cpu(cpu) { 1214 if (arch_match_cpu_phys_id(cpu, apic_id)) 1215 break; 1216 } 1217 if (cpu >= nr_cpu_ids) 1218 return -EINVAL; 1219 1220 cur_vmsa = per_cpu(sev_vmsa, cpu); 1221 1222 /* 1223 * A new VMSA is created each time because there is no guarantee that 1224 * the current VMSA is the kernels or that the vCPU is not running. If 1225 * an attempt was done to use the current VMSA with a running vCPU, a 1226 * #VMEXIT of that vCPU would wipe out all of the settings being done 1227 * here. 1228 */ 1229 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 1230 if (!vmsa) 1231 return -ENOMEM; 1232 1233 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 1234 caa = per_cpu(svsm_caa, cpu); 1235 1236 /* CR4 should maintain the MCE value */ 1237 cr4 = native_read_cr4() & X86_CR4_MCE; 1238 1239 /* Set the CS value based on the start_ip converted to a SIPI vector */ 1240 sipi_vector = (start_ip >> 12); 1241 vmsa->cs.base = sipi_vector << 12; 1242 vmsa->cs.limit = AP_INIT_CS_LIMIT; 1243 vmsa->cs.attrib = INIT_CS_ATTRIBS; 1244 vmsa->cs.selector = sipi_vector << 8; 1245 1246 /* Set the RIP value based on start_ip */ 1247 vmsa->rip = start_ip & 0xfff; 1248 1249 /* Set AP INIT defaults as documented in the APM */ 1250 vmsa->ds.limit = AP_INIT_DS_LIMIT; 1251 vmsa->ds.attrib = INIT_DS_ATTRIBS; 1252 vmsa->es = vmsa->ds; 1253 vmsa->fs = vmsa->ds; 1254 vmsa->gs = vmsa->ds; 1255 vmsa->ss = vmsa->ds; 1256 1257 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 1258 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 1259 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 1260 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 1261 vmsa->tr.limit = AP_INIT_TR_LIMIT; 1262 vmsa->tr.attrib = INIT_TR_ATTRIBS; 1263 1264 vmsa->cr4 = cr4; 1265 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 1266 vmsa->dr7 = DR7_RESET_VALUE; 1267 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 1268 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 1269 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 1270 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 1271 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 1272 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 1273 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 1274 1275 /* SVME must be set. */ 1276 vmsa->efer = EFER_SVME; 1277 1278 /* 1279 * Set the SNP-specific fields for this VMSA: 1280 * VMPL level 1281 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 1282 */ 1283 vmsa->vmpl = snp_vmpl; 1284 vmsa->sev_features = sev_status >> 2; 1285 1286 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 1287 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 1288 vmsa->tsc_scale = snp_tsc_scale; 1289 vmsa->tsc_offset = snp_tsc_offset; 1290 } 1291 1292 /* Switch the page over to a VMSA page now that it is initialized */ 1293 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 1294 if (ret) { 1295 pr_err("set VMSA page failed (%u)\n", ret); 1296 free_page((unsigned long)vmsa); 1297 1298 return -EINVAL; 1299 } 1300 1301 /* Issue VMGEXIT AP Creation NAE event */ 1302 local_irq_save(flags); 1303 1304 ghcb = __sev_get_ghcb(&state); 1305 1306 vc_ghcb_invalidate(ghcb); 1307 ghcb_set_rax(ghcb, vmsa->sev_features); 1308 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 1309 ghcb_set_sw_exit_info_1(ghcb, 1310 ((u64)apic_id << 32) | 1311 ((u64)snp_vmpl << 16) | 1312 SVM_VMGEXIT_AP_CREATE); 1313 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 1314 1315 sev_es_wr_ghcb_msr(__pa(ghcb)); 1316 VMGEXIT(); 1317 1318 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 1319 lower_32_bits(ghcb->save.sw_exit_info_1)) { 1320 pr_err("SNP AP Creation error\n"); 1321 ret = -EINVAL; 1322 } 1323 1324 __sev_put_ghcb(&state); 1325 1326 local_irq_restore(flags); 1327 1328 /* Perform cleanup if there was an error */ 1329 if (ret) { 1330 snp_cleanup_vmsa(vmsa, apic_id); 1331 vmsa = NULL; 1332 } 1333 1334 /* Free up any previous VMSA page */ 1335 if (cur_vmsa) 1336 snp_cleanup_vmsa(cur_vmsa, apic_id); 1337 1338 /* Record the current VMSA page */ 1339 per_cpu(sev_vmsa, cpu) = vmsa; 1340 1341 return ret; 1342 } 1343 1344 void __init snp_set_wakeup_secondary_cpu(void) 1345 { 1346 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1347 return; 1348 1349 /* 1350 * Always set this override if SNP is enabled. This makes it the 1351 * required method to start APs under SNP. If the hypervisor does 1352 * not support AP creation, then no APs will be started. 1353 */ 1354 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 1355 } 1356 1357 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 1358 { 1359 u16 startup_cs, startup_ip; 1360 phys_addr_t jump_table_pa; 1361 u64 jump_table_addr; 1362 u16 __iomem *jump_table; 1363 1364 jump_table_addr = get_jump_table_addr(); 1365 1366 /* On UP guests there is no jump table so this is not a failure */ 1367 if (!jump_table_addr) 1368 return 0; 1369 1370 /* Check if AP Jump Table is page-aligned */ 1371 if (jump_table_addr & ~PAGE_MASK) 1372 return -EINVAL; 1373 1374 jump_table_pa = jump_table_addr & PAGE_MASK; 1375 1376 startup_cs = (u16)(rmh->trampoline_start >> 4); 1377 startup_ip = (u16)(rmh->sev_es_trampoline_start - 1378 rmh->trampoline_start); 1379 1380 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 1381 if (!jump_table) 1382 return -EIO; 1383 1384 writew(startup_ip, &jump_table[0]); 1385 writew(startup_cs, &jump_table[1]); 1386 1387 iounmap(jump_table); 1388 1389 return 0; 1390 } 1391 1392 /* 1393 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 1394 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 1395 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 1396 */ 1397 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 1398 { 1399 struct sev_es_runtime_data *data; 1400 unsigned long address, pflags; 1401 int cpu; 1402 u64 pfn; 1403 1404 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1405 return 0; 1406 1407 pflags = _PAGE_NX | _PAGE_RW; 1408 1409 for_each_possible_cpu(cpu) { 1410 data = per_cpu(runtime_data, cpu); 1411 1412 address = __pa(&data->ghcb_page); 1413 pfn = address >> PAGE_SHIFT; 1414 1415 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 1416 return 1; 1417 } 1418 1419 return 0; 1420 } 1421 1422 /* Writes to the SVSM CAA MSR are ignored */ 1423 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) 1424 { 1425 if (write) 1426 return ES_OK; 1427 1428 regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); 1429 regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); 1430 1431 return ES_OK; 1432 } 1433 1434 /* 1435 * TSC related accesses should not exit to the hypervisor when a guest is 1436 * executing with Secure TSC enabled, so special handling is required for 1437 * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. 1438 */ 1439 static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) 1440 { 1441 u64 tsc; 1442 1443 /* 1444 * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. 1445 * Terminate the SNP guest when the interception is enabled. 1446 */ 1447 if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) 1448 return ES_VMM_ERROR; 1449 1450 /* 1451 * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC 1452 * to return undefined values, so ignore all writes. 1453 * 1454 * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use 1455 * the value returned by rdtsc_ordered(). 1456 */ 1457 if (write) { 1458 WARN_ONCE(1, "TSC MSR writes are verboten!\n"); 1459 return ES_OK; 1460 } 1461 1462 tsc = rdtsc_ordered(); 1463 regs->ax = lower_32_bits(tsc); 1464 regs->dx = upper_32_bits(tsc); 1465 1466 return ES_OK; 1467 } 1468 1469 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1470 { 1471 struct pt_regs *regs = ctxt->regs; 1472 enum es_result ret; 1473 bool write; 1474 1475 /* Is it a WRMSR? */ 1476 write = ctxt->insn.opcode.bytes[1] == 0x30; 1477 1478 switch (regs->cx) { 1479 case MSR_SVSM_CAA: 1480 return __vc_handle_msr_caa(regs, write); 1481 case MSR_IA32_TSC: 1482 case MSR_AMD64_GUEST_TSC_FREQ: 1483 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 1484 return __vc_handle_secure_tsc_msrs(regs, write); 1485 else 1486 break; 1487 default: 1488 break; 1489 } 1490 1491 ghcb_set_rcx(ghcb, regs->cx); 1492 if (write) { 1493 ghcb_set_rax(ghcb, regs->ax); 1494 ghcb_set_rdx(ghcb, regs->dx); 1495 } 1496 1497 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); 1498 1499 if ((ret == ES_OK) && !write) { 1500 regs->ax = ghcb->save.rax; 1501 regs->dx = ghcb->save.rdx; 1502 } 1503 1504 return ret; 1505 } 1506 1507 static void snp_register_per_cpu_ghcb(void) 1508 { 1509 struct sev_es_runtime_data *data; 1510 struct ghcb *ghcb; 1511 1512 data = this_cpu_read(runtime_data); 1513 ghcb = &data->ghcb_page; 1514 1515 snp_register_ghcb_early(__pa(ghcb)); 1516 } 1517 1518 void setup_ghcb(void) 1519 { 1520 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1521 return; 1522 1523 /* 1524 * Check whether the runtime #VC exception handler is active. It uses 1525 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1526 * 1527 * If SNP is active, register the per-CPU GHCB page so that the runtime 1528 * exception handler can use it. 1529 */ 1530 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1531 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1532 snp_register_per_cpu_ghcb(); 1533 1534 sev_cfg.ghcbs_initialized = true; 1535 1536 return; 1537 } 1538 1539 /* 1540 * Make sure the hypervisor talks a supported protocol. 1541 * This gets called only in the BSP boot phase. 1542 */ 1543 if (!sev_es_negotiate_protocol()) 1544 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1545 1546 /* 1547 * Clear the boot_ghcb. The first exception comes in before the bss 1548 * section is cleared. 1549 */ 1550 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1551 1552 /* Alright - Make the boot-ghcb public */ 1553 boot_ghcb = &boot_ghcb_page; 1554 1555 /* SNP guest requires that GHCB GPA must be registered. */ 1556 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1557 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1558 } 1559 1560 #ifdef CONFIG_HOTPLUG_CPU 1561 static void sev_es_ap_hlt_loop(void) 1562 { 1563 struct ghcb_state state; 1564 struct ghcb *ghcb; 1565 1566 ghcb = __sev_get_ghcb(&state); 1567 1568 while (true) { 1569 vc_ghcb_invalidate(ghcb); 1570 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1571 ghcb_set_sw_exit_info_1(ghcb, 0); 1572 ghcb_set_sw_exit_info_2(ghcb, 0); 1573 1574 sev_es_wr_ghcb_msr(__pa(ghcb)); 1575 VMGEXIT(); 1576 1577 /* Wakeup signal? */ 1578 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1579 ghcb->save.sw_exit_info_2) 1580 break; 1581 } 1582 1583 __sev_put_ghcb(&state); 1584 } 1585 1586 /* 1587 * Play_dead handler when running under SEV-ES. This is needed because 1588 * the hypervisor can't deliver an SIPI request to restart the AP. 1589 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1590 * hypervisor wakes it up again. 1591 */ 1592 static void sev_es_play_dead(void) 1593 { 1594 play_dead_common(); 1595 1596 /* IRQs now disabled */ 1597 1598 sev_es_ap_hlt_loop(); 1599 1600 /* 1601 * If we get here, the VCPU was woken up again. Jump to CPU 1602 * startup code to get it back online. 1603 */ 1604 soft_restart_cpu(); 1605 } 1606 #else /* CONFIG_HOTPLUG_CPU */ 1607 #define sev_es_play_dead native_play_dead 1608 #endif /* CONFIG_HOTPLUG_CPU */ 1609 1610 #ifdef CONFIG_SMP 1611 static void __init sev_es_setup_play_dead(void) 1612 { 1613 smp_ops.play_dead = sev_es_play_dead; 1614 } 1615 #else 1616 static inline void sev_es_setup_play_dead(void) { } 1617 #endif 1618 1619 static void __init alloc_runtime_data(int cpu) 1620 { 1621 struct sev_es_runtime_data *data; 1622 1623 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1624 if (!data) 1625 panic("Can't allocate SEV-ES runtime data"); 1626 1627 per_cpu(runtime_data, cpu) = data; 1628 1629 if (snp_vmpl) { 1630 struct svsm_ca *caa; 1631 1632 /* Allocate the SVSM CA page if an SVSM is present */ 1633 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE); 1634 1635 per_cpu(svsm_caa, cpu) = caa; 1636 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1637 } 1638 } 1639 1640 static void __init init_ghcb(int cpu) 1641 { 1642 struct sev_es_runtime_data *data; 1643 int err; 1644 1645 data = per_cpu(runtime_data, cpu); 1646 1647 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1648 sizeof(data->ghcb_page)); 1649 if (err) 1650 panic("Can't map GHCBs unencrypted"); 1651 1652 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1653 1654 data->ghcb_active = false; 1655 data->backup_ghcb_active = false; 1656 } 1657 1658 void __init sev_es_init_vc_handling(void) 1659 { 1660 int cpu; 1661 1662 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1663 1664 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1665 return; 1666 1667 if (!sev_es_check_cpu_features()) 1668 panic("SEV-ES CPU Features missing"); 1669 1670 /* 1671 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1672 * features. 1673 */ 1674 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1675 sev_hv_features = get_hv_features(); 1676 1677 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1678 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1679 } 1680 1681 /* Initialize per-cpu GHCB pages */ 1682 for_each_possible_cpu(cpu) { 1683 alloc_runtime_data(cpu); 1684 init_ghcb(cpu); 1685 } 1686 1687 /* If running under an SVSM, switch to the per-cpu CA */ 1688 if (snp_vmpl) { 1689 struct svsm_call call = {}; 1690 unsigned long flags; 1691 int ret; 1692 1693 local_irq_save(flags); 1694 1695 /* 1696 * SVSM_CORE_REMAP_CA call: 1697 * RAX = 0 (Protocol=0, CallID=0) 1698 * RCX = New CA GPA 1699 */ 1700 call.caa = svsm_get_caa(); 1701 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 1702 call.rcx = this_cpu_read(svsm_caa_pa); 1703 ret = svsm_perform_call_protocol(&call); 1704 if (ret) 1705 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", 1706 ret, call.rax_out); 1707 1708 sev_cfg.use_cas = true; 1709 1710 local_irq_restore(flags); 1711 } 1712 1713 sev_es_setup_play_dead(); 1714 1715 /* Secondary CPUs use the runtime #VC handler */ 1716 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1717 } 1718 1719 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 1720 { 1721 int trapnr = ctxt->fi.vector; 1722 1723 if (trapnr == X86_TRAP_PF) 1724 native_write_cr2(ctxt->fi.cr2); 1725 1726 ctxt->regs->orig_ax = ctxt->fi.error_code; 1727 do_early_exception(ctxt->regs, trapnr); 1728 } 1729 1730 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 1731 { 1732 long *reg_array; 1733 int offset; 1734 1735 reg_array = (long *)ctxt->regs; 1736 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 1737 1738 if (offset < 0) 1739 return NULL; 1740 1741 offset /= sizeof(long); 1742 1743 return reg_array + offset; 1744 } 1745 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 1746 unsigned int bytes, bool read) 1747 { 1748 u64 exit_code, exit_info_1, exit_info_2; 1749 unsigned long ghcb_pa = __pa(ghcb); 1750 enum es_result res; 1751 phys_addr_t paddr; 1752 void __user *ref; 1753 1754 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 1755 if (ref == (void __user *)-1L) 1756 return ES_UNSUPPORTED; 1757 1758 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 1759 1760 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 1761 if (res != ES_OK) { 1762 if (res == ES_EXCEPTION && !read) 1763 ctxt->fi.error_code |= X86_PF_WRITE; 1764 1765 return res; 1766 } 1767 1768 exit_info_1 = paddr; 1769 /* Can never be greater than 8 */ 1770 exit_info_2 = bytes; 1771 1772 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 1773 1774 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 1775 } 1776 1777 /* 1778 * The MOVS instruction has two memory operands, which raises the 1779 * problem that it is not known whether the access to the source or the 1780 * destination caused the #VC exception (and hence whether an MMIO read 1781 * or write operation needs to be emulated). 1782 * 1783 * Instead of playing games with walking page-tables and trying to guess 1784 * whether the source or destination is an MMIO range, split the move 1785 * into two operations, a read and a write with only one memory operand. 1786 * This will cause a nested #VC exception on the MMIO address which can 1787 * then be handled. 1788 * 1789 * This implementation has the benefit that it also supports MOVS where 1790 * source _and_ destination are MMIO regions. 1791 * 1792 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 1793 * rare operation. If it turns out to be a performance problem the split 1794 * operations can be moved to memcpy_fromio() and memcpy_toio(). 1795 */ 1796 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 1797 unsigned int bytes) 1798 { 1799 unsigned long ds_base, es_base; 1800 unsigned char *src, *dst; 1801 unsigned char buffer[8]; 1802 enum es_result ret; 1803 bool rep; 1804 int off; 1805 1806 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 1807 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 1808 1809 if (ds_base == -1L || es_base == -1L) { 1810 ctxt->fi.vector = X86_TRAP_GP; 1811 ctxt->fi.error_code = 0; 1812 return ES_EXCEPTION; 1813 } 1814 1815 src = ds_base + (unsigned char *)ctxt->regs->si; 1816 dst = es_base + (unsigned char *)ctxt->regs->di; 1817 1818 ret = vc_read_mem(ctxt, src, buffer, bytes); 1819 if (ret != ES_OK) 1820 return ret; 1821 1822 ret = vc_write_mem(ctxt, dst, buffer, bytes); 1823 if (ret != ES_OK) 1824 return ret; 1825 1826 if (ctxt->regs->flags & X86_EFLAGS_DF) 1827 off = -bytes; 1828 else 1829 off = bytes; 1830 1831 ctxt->regs->si += off; 1832 ctxt->regs->di += off; 1833 1834 rep = insn_has_rep_prefix(&ctxt->insn); 1835 if (rep) 1836 ctxt->regs->cx -= 1; 1837 1838 if (!rep || ctxt->regs->cx == 0) 1839 return ES_OK; 1840 else 1841 return ES_RETRY; 1842 } 1843 1844 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1845 { 1846 struct insn *insn = &ctxt->insn; 1847 enum insn_mmio_type mmio; 1848 unsigned int bytes = 0; 1849 enum es_result ret; 1850 u8 sign_byte; 1851 long *reg_data; 1852 1853 mmio = insn_decode_mmio(insn, &bytes); 1854 if (mmio == INSN_MMIO_DECODE_FAILED) 1855 return ES_DECODE_FAILED; 1856 1857 if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { 1858 reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); 1859 if (!reg_data) 1860 return ES_DECODE_FAILED; 1861 } 1862 1863 if (user_mode(ctxt->regs)) 1864 return ES_UNSUPPORTED; 1865 1866 switch (mmio) { 1867 case INSN_MMIO_WRITE: 1868 memcpy(ghcb->shared_buffer, reg_data, bytes); 1869 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1870 break; 1871 case INSN_MMIO_WRITE_IMM: 1872 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1873 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1874 break; 1875 case INSN_MMIO_READ: 1876 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1877 if (ret) 1878 break; 1879 1880 /* Zero-extend for 32-bit operation */ 1881 if (bytes == 4) 1882 *reg_data = 0; 1883 1884 memcpy(reg_data, ghcb->shared_buffer, bytes); 1885 break; 1886 case INSN_MMIO_READ_ZERO_EXTEND: 1887 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1888 if (ret) 1889 break; 1890 1891 /* Zero extend based on operand size */ 1892 memset(reg_data, 0, insn->opnd_bytes); 1893 memcpy(reg_data, ghcb->shared_buffer, bytes); 1894 break; 1895 case INSN_MMIO_READ_SIGN_EXTEND: 1896 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1897 if (ret) 1898 break; 1899 1900 if (bytes == 1) { 1901 u8 *val = (u8 *)ghcb->shared_buffer; 1902 1903 sign_byte = (*val & 0x80) ? 0xff : 0x00; 1904 } else { 1905 u16 *val = (u16 *)ghcb->shared_buffer; 1906 1907 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 1908 } 1909 1910 /* Sign extend based on operand size */ 1911 memset(reg_data, sign_byte, insn->opnd_bytes); 1912 memcpy(reg_data, ghcb->shared_buffer, bytes); 1913 break; 1914 case INSN_MMIO_MOVS: 1915 ret = vc_handle_mmio_movs(ctxt, bytes); 1916 break; 1917 default: 1918 ret = ES_UNSUPPORTED; 1919 break; 1920 } 1921 1922 return ret; 1923 } 1924 1925 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1926 struct es_em_ctxt *ctxt) 1927 { 1928 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1929 long val, *reg = vc_insn_get_rm(ctxt); 1930 enum es_result ret; 1931 1932 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1933 return ES_VMM_ERROR; 1934 1935 if (!reg) 1936 return ES_DECODE_FAILED; 1937 1938 val = *reg; 1939 1940 /* Upper 32 bits must be written as zeroes */ 1941 if (val >> 32) { 1942 ctxt->fi.vector = X86_TRAP_GP; 1943 ctxt->fi.error_code = 0; 1944 return ES_EXCEPTION; 1945 } 1946 1947 /* Clear out other reserved bits and set bit 10 */ 1948 val = (val & 0xffff23ffL) | BIT(10); 1949 1950 /* Early non-zero writes to DR7 are not supported */ 1951 if (!data && (val & ~DR7_RESET_VALUE)) 1952 return ES_UNSUPPORTED; 1953 1954 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1955 ghcb_set_rax(ghcb, val); 1956 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1957 if (ret != ES_OK) 1958 return ret; 1959 1960 if (data) 1961 data->dr7 = val; 1962 1963 return ES_OK; 1964 } 1965 1966 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1967 struct es_em_ctxt *ctxt) 1968 { 1969 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1970 long *reg = vc_insn_get_rm(ctxt); 1971 1972 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1973 return ES_VMM_ERROR; 1974 1975 if (!reg) 1976 return ES_DECODE_FAILED; 1977 1978 if (data) 1979 *reg = data->dr7; 1980 else 1981 *reg = DR7_RESET_VALUE; 1982 1983 return ES_OK; 1984 } 1985 1986 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1987 struct es_em_ctxt *ctxt) 1988 { 1989 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 1990 } 1991 1992 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1993 { 1994 enum es_result ret; 1995 1996 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1997 1998 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 1999 if (ret != ES_OK) 2000 return ret; 2001 2002 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 2003 return ES_VMM_ERROR; 2004 2005 ctxt->regs->ax = ghcb->save.rax; 2006 ctxt->regs->dx = ghcb->save.rdx; 2007 2008 return ES_OK; 2009 } 2010 2011 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 2012 struct es_em_ctxt *ctxt) 2013 { 2014 /* 2015 * Treat it as a NOP and do not leak a physical address to the 2016 * hypervisor. 2017 */ 2018 return ES_OK; 2019 } 2020 2021 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 2022 struct es_em_ctxt *ctxt) 2023 { 2024 /* Treat the same as MONITOR/MONITORX */ 2025 return ES_OK; 2026 } 2027 2028 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 2029 struct es_em_ctxt *ctxt) 2030 { 2031 enum es_result ret; 2032 2033 ghcb_set_rax(ghcb, ctxt->regs->ax); 2034 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 2035 2036 if (x86_platform.hyper.sev_es_hcall_prepare) 2037 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 2038 2039 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 2040 if (ret != ES_OK) 2041 return ret; 2042 2043 if (!ghcb_rax_is_valid(ghcb)) 2044 return ES_VMM_ERROR; 2045 2046 ctxt->regs->ax = ghcb->save.rax; 2047 2048 /* 2049 * Call sev_es_hcall_finish() after regs->ax is already set. 2050 * This allows the hypervisor handler to overwrite it again if 2051 * necessary. 2052 */ 2053 if (x86_platform.hyper.sev_es_hcall_finish && 2054 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 2055 return ES_VMM_ERROR; 2056 2057 return ES_OK; 2058 } 2059 2060 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 2061 struct es_em_ctxt *ctxt) 2062 { 2063 /* 2064 * Calling ecx_alignment_check() directly does not work, because it 2065 * enables IRQs and the GHCB is active. Forward the exception and call 2066 * it later from vc_forward_exception(). 2067 */ 2068 ctxt->fi.vector = X86_TRAP_AC; 2069 ctxt->fi.error_code = 0; 2070 return ES_EXCEPTION; 2071 } 2072 2073 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 2074 struct ghcb *ghcb, 2075 unsigned long exit_code) 2076 { 2077 enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); 2078 2079 if (result != ES_OK) 2080 return result; 2081 2082 switch (exit_code) { 2083 case SVM_EXIT_READ_DR7: 2084 result = vc_handle_dr7_read(ghcb, ctxt); 2085 break; 2086 case SVM_EXIT_WRITE_DR7: 2087 result = vc_handle_dr7_write(ghcb, ctxt); 2088 break; 2089 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 2090 result = vc_handle_trap_ac(ghcb, ctxt); 2091 break; 2092 case SVM_EXIT_RDTSC: 2093 case SVM_EXIT_RDTSCP: 2094 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 2095 break; 2096 case SVM_EXIT_RDPMC: 2097 result = vc_handle_rdpmc(ghcb, ctxt); 2098 break; 2099 case SVM_EXIT_INVD: 2100 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 2101 result = ES_UNSUPPORTED; 2102 break; 2103 case SVM_EXIT_CPUID: 2104 result = vc_handle_cpuid(ghcb, ctxt); 2105 break; 2106 case SVM_EXIT_IOIO: 2107 result = vc_handle_ioio(ghcb, ctxt); 2108 break; 2109 case SVM_EXIT_MSR: 2110 result = vc_handle_msr(ghcb, ctxt); 2111 break; 2112 case SVM_EXIT_VMMCALL: 2113 result = vc_handle_vmmcall(ghcb, ctxt); 2114 break; 2115 case SVM_EXIT_WBINVD: 2116 result = vc_handle_wbinvd(ghcb, ctxt); 2117 break; 2118 case SVM_EXIT_MONITOR: 2119 result = vc_handle_monitor(ghcb, ctxt); 2120 break; 2121 case SVM_EXIT_MWAIT: 2122 result = vc_handle_mwait(ghcb, ctxt); 2123 break; 2124 case SVM_EXIT_NPF: 2125 result = vc_handle_mmio(ghcb, ctxt); 2126 break; 2127 default: 2128 /* 2129 * Unexpected #VC exception 2130 */ 2131 result = ES_UNSUPPORTED; 2132 } 2133 2134 return result; 2135 } 2136 2137 static __always_inline bool is_vc2_stack(unsigned long sp) 2138 { 2139 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 2140 } 2141 2142 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 2143 { 2144 unsigned long sp, prev_sp; 2145 2146 sp = (unsigned long)regs; 2147 prev_sp = regs->sp; 2148 2149 /* 2150 * If the code was already executing on the VC2 stack when the #VC 2151 * happened, let it proceed to the normal handling routine. This way the 2152 * code executing on the VC2 stack can cause #VC exceptions to get handled. 2153 */ 2154 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 2155 } 2156 2157 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 2158 { 2159 struct ghcb_state state; 2160 struct es_em_ctxt ctxt; 2161 enum es_result result; 2162 struct ghcb *ghcb; 2163 bool ret = true; 2164 2165 ghcb = __sev_get_ghcb(&state); 2166 2167 vc_ghcb_invalidate(ghcb); 2168 result = vc_init_em_ctxt(&ctxt, regs, error_code); 2169 2170 if (result == ES_OK) 2171 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 2172 2173 __sev_put_ghcb(&state); 2174 2175 /* Done - now check the result */ 2176 switch (result) { 2177 case ES_OK: 2178 vc_finish_insn(&ctxt); 2179 break; 2180 case ES_UNSUPPORTED: 2181 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 2182 error_code, regs->ip); 2183 ret = false; 2184 break; 2185 case ES_VMM_ERROR: 2186 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2187 error_code, regs->ip); 2188 ret = false; 2189 break; 2190 case ES_DECODE_FAILED: 2191 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2192 error_code, regs->ip); 2193 ret = false; 2194 break; 2195 case ES_EXCEPTION: 2196 vc_forward_exception(&ctxt); 2197 break; 2198 case ES_RETRY: 2199 /* Nothing to do */ 2200 break; 2201 default: 2202 pr_emerg("Unknown result in %s():%d\n", __func__, result); 2203 /* 2204 * Emulating the instruction which caused the #VC exception 2205 * failed - can't continue so print debug information 2206 */ 2207 BUG(); 2208 } 2209 2210 return ret; 2211 } 2212 2213 static __always_inline bool vc_is_db(unsigned long error_code) 2214 { 2215 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 2216 } 2217 2218 /* 2219 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 2220 * and will panic when an error happens. 2221 */ 2222 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 2223 { 2224 irqentry_state_t irq_state; 2225 2226 /* 2227 * With the current implementation it is always possible to switch to a 2228 * safe stack because #VC exceptions only happen at known places, like 2229 * intercepted instructions or accesses to MMIO areas/IO ports. They can 2230 * also happen with code instrumentation when the hypervisor intercepts 2231 * #DB, but the critical paths are forbidden to be instrumented, so #DB 2232 * exceptions currently also only happen in safe places. 2233 * 2234 * But keep this here in case the noinstr annotations are violated due 2235 * to bug elsewhere. 2236 */ 2237 if (unlikely(vc_from_invalid_context(regs))) { 2238 instrumentation_begin(); 2239 panic("Can't handle #VC exception from unsupported context\n"); 2240 instrumentation_end(); 2241 } 2242 2243 /* 2244 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2245 */ 2246 if (vc_is_db(error_code)) { 2247 exc_debug(regs); 2248 return; 2249 } 2250 2251 irq_state = irqentry_nmi_enter(regs); 2252 2253 instrumentation_begin(); 2254 2255 if (!vc_raw_handle_exception(regs, error_code)) { 2256 /* Show some debug info */ 2257 show_regs(regs); 2258 2259 /* Ask hypervisor to sev_es_terminate */ 2260 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2261 2262 /* If that fails and we get here - just panic */ 2263 panic("Returned from Terminate-Request to Hypervisor\n"); 2264 } 2265 2266 instrumentation_end(); 2267 irqentry_nmi_exit(regs, irq_state); 2268 } 2269 2270 /* 2271 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 2272 * and will kill the current task with SIGBUS when an error happens. 2273 */ 2274 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 2275 { 2276 /* 2277 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2278 */ 2279 if (vc_is_db(error_code)) { 2280 noist_exc_debug(regs); 2281 return; 2282 } 2283 2284 irqentry_enter_from_user_mode(regs); 2285 instrumentation_begin(); 2286 2287 if (!vc_raw_handle_exception(regs, error_code)) { 2288 /* 2289 * Do not kill the machine if user-space triggered the 2290 * exception. Send SIGBUS instead and let user-space deal with 2291 * it. 2292 */ 2293 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 2294 } 2295 2296 instrumentation_end(); 2297 irqentry_exit_to_user_mode(regs); 2298 } 2299 2300 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 2301 { 2302 unsigned long exit_code = regs->orig_ax; 2303 struct es_em_ctxt ctxt; 2304 enum es_result result; 2305 2306 vc_ghcb_invalidate(boot_ghcb); 2307 2308 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 2309 if (result == ES_OK) 2310 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 2311 2312 /* Done - now check the result */ 2313 switch (result) { 2314 case ES_OK: 2315 vc_finish_insn(&ctxt); 2316 break; 2317 case ES_UNSUPPORTED: 2318 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 2319 exit_code, regs->ip); 2320 goto fail; 2321 case ES_VMM_ERROR: 2322 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2323 exit_code, regs->ip); 2324 goto fail; 2325 case ES_DECODE_FAILED: 2326 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2327 exit_code, regs->ip); 2328 goto fail; 2329 case ES_EXCEPTION: 2330 vc_early_forward_exception(&ctxt); 2331 break; 2332 case ES_RETRY: 2333 /* Nothing to do */ 2334 break; 2335 default: 2336 BUG(); 2337 } 2338 2339 return true; 2340 2341 fail: 2342 show_regs(regs); 2343 2344 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2345 } 2346 2347 /* 2348 * Initial set up of SNP relies on information provided by the 2349 * Confidential Computing blob, which can be passed to the kernel 2350 * in the following ways, depending on how it is booted: 2351 * 2352 * - when booted via the boot/decompress kernel: 2353 * - via boot_params 2354 * 2355 * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): 2356 * - via a setup_data entry, as defined by the Linux Boot Protocol 2357 * 2358 * Scan for the blob in that order. 2359 */ 2360 static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) 2361 { 2362 struct cc_blob_sev_info *cc_info; 2363 2364 /* Boot kernel would have passed the CC blob via boot_params. */ 2365 if (bp->cc_blob_address) { 2366 cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; 2367 goto found_cc_info; 2368 } 2369 2370 /* 2371 * If kernel was booted directly, without the use of the 2372 * boot/decompression kernel, the CC blob may have been passed via 2373 * setup_data instead. 2374 */ 2375 cc_info = find_cc_blob_setup_data(bp); 2376 if (!cc_info) 2377 return NULL; 2378 2379 found_cc_info: 2380 if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) 2381 snp_abort(); 2382 2383 return cc_info; 2384 } 2385 2386 static __head void svsm_setup(struct cc_blob_sev_info *cc_info) 2387 { 2388 struct svsm_call call = {}; 2389 int ret; 2390 u64 pa; 2391 2392 /* 2393 * Record the SVSM Calling Area address (CAA) if the guest is not 2394 * running at VMPL0. The CA will be used to communicate with the 2395 * SVSM to perform the SVSM services. 2396 */ 2397 if (!svsm_setup_ca(cc_info)) 2398 return; 2399 2400 /* 2401 * It is very early in the boot and the kernel is running identity 2402 * mapped but without having adjusted the pagetables to where the 2403 * kernel was loaded (physbase), so the get the CA address using 2404 * RIP-relative addressing. 2405 */ 2406 pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); 2407 2408 /* 2409 * Switch over to the boot SVSM CA while the current CA is still 2410 * addressable. There is no GHCB at this point so use the MSR protocol. 2411 * 2412 * SVSM_CORE_REMAP_CA call: 2413 * RAX = 0 (Protocol=0, CallID=0) 2414 * RCX = New CA GPA 2415 */ 2416 call.caa = svsm_get_caa(); 2417 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 2418 call.rcx = pa; 2419 ret = svsm_perform_call_protocol(&call); 2420 if (ret) 2421 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); 2422 2423 RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; 2424 RIP_REL_REF(boot_svsm_caa_pa) = pa; 2425 } 2426 2427 bool __head snp_init(struct boot_params *bp) 2428 { 2429 struct cc_blob_sev_info *cc_info; 2430 2431 if (!bp) 2432 return false; 2433 2434 cc_info = find_cc_blob(bp); 2435 if (!cc_info) 2436 return false; 2437 2438 if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) 2439 secrets_pa = cc_info->secrets_phys; 2440 else 2441 return false; 2442 2443 setup_cpuid_table(cc_info); 2444 2445 svsm_setup(cc_info); 2446 2447 /* 2448 * The CC blob will be used later to access the secrets page. Cache 2449 * it here like the boot kernel does. 2450 */ 2451 bp->cc_blob_address = (u32)(unsigned long)cc_info; 2452 2453 return true; 2454 } 2455 2456 void __head __noreturn snp_abort(void) 2457 { 2458 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 2459 } 2460 2461 /* 2462 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 2463 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 2464 * ROM region can cause a crash since this region is not pre-validated. 2465 */ 2466 void __init snp_dmi_setup(void) 2467 { 2468 if (efi_enabled(EFI_CONFIG_TABLES)) 2469 dmi_setup(); 2470 } 2471 2472 static void dump_cpuid_table(void) 2473 { 2474 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2475 int i = 0; 2476 2477 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 2478 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 2479 2480 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 2481 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 2482 2483 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 2484 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 2485 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 2486 } 2487 } 2488 2489 /* 2490 * It is useful from an auditing/testing perspective to provide an easy way 2491 * for the guest owner to know that the CPUID table has been initialized as 2492 * expected, but that initialization happens too early in boot to print any 2493 * sort of indicator, and there's not really any other good place to do it, 2494 * so do it here. 2495 * 2496 * If running as an SNP guest, report the current VM privilege level (VMPL). 2497 */ 2498 static int __init report_snp_info(void) 2499 { 2500 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2501 2502 if (cpuid_table->count) { 2503 pr_info("Using SNP CPUID table, %d entries present.\n", 2504 cpuid_table->count); 2505 2506 if (sev_cfg.debug) 2507 dump_cpuid_table(); 2508 } 2509 2510 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2511 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 2512 2513 return 0; 2514 } 2515 arch_initcall(report_snp_info); 2516 2517 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) 2518 { 2519 /* If (new) lengths have been returned, propagate them up */ 2520 if (call->rcx_out != call->rcx) 2521 input->manifest_buf.len = call->rcx_out; 2522 2523 if (call->rdx_out != call->rdx) 2524 input->certificates_buf.len = call->rdx_out; 2525 2526 if (call->r8_out != call->r8) 2527 input->report_buf.len = call->r8_out; 2528 } 2529 2530 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, 2531 struct svsm_attest_call *input) 2532 { 2533 struct svsm_attest_call *ac; 2534 unsigned long flags; 2535 u64 attest_call_pa; 2536 int ret; 2537 2538 if (!snp_vmpl) 2539 return -EINVAL; 2540 2541 local_irq_save(flags); 2542 2543 call->caa = svsm_get_caa(); 2544 2545 ac = (struct svsm_attest_call *)call->caa->svsm_buffer; 2546 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); 2547 2548 *ac = *input; 2549 2550 /* 2551 * Set input registers for the request and set RDX and R8 to known 2552 * values in order to detect length values being returned in them. 2553 */ 2554 call->rax = call_id; 2555 call->rcx = attest_call_pa; 2556 call->rdx = -1; 2557 call->r8 = -1; 2558 ret = svsm_perform_call_protocol(call); 2559 update_attest_input(call, input); 2560 2561 local_irq_restore(flags); 2562 2563 return ret; 2564 } 2565 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); 2566 2567 static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, 2568 struct snp_guest_request_ioctl *rio) 2569 { 2570 struct ghcb_state state; 2571 struct es_em_ctxt ctxt; 2572 unsigned long flags; 2573 struct ghcb *ghcb; 2574 int ret; 2575 2576 rio->exitinfo2 = SEV_RET_NO_FW_CALL; 2577 2578 /* 2579 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 2580 * a per-CPU GHCB. 2581 */ 2582 local_irq_save(flags); 2583 2584 ghcb = __sev_get_ghcb(&state); 2585 if (!ghcb) { 2586 ret = -EIO; 2587 goto e_restore_irq; 2588 } 2589 2590 vc_ghcb_invalidate(ghcb); 2591 2592 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2593 ghcb_set_rax(ghcb, input->data_gpa); 2594 ghcb_set_rbx(ghcb, input->data_npages); 2595 } 2596 2597 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 2598 if (ret) 2599 goto e_put; 2600 2601 rio->exitinfo2 = ghcb->save.sw_exit_info_2; 2602 switch (rio->exitinfo2) { 2603 case 0: 2604 break; 2605 2606 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 2607 ret = -EAGAIN; 2608 break; 2609 2610 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 2611 /* Number of expected pages are returned in RBX */ 2612 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2613 input->data_npages = ghcb_get_rbx(ghcb); 2614 ret = -ENOSPC; 2615 break; 2616 } 2617 fallthrough; 2618 default: 2619 ret = -EIO; 2620 break; 2621 } 2622 2623 e_put: 2624 __sev_put_ghcb(&state); 2625 e_restore_irq: 2626 local_irq_restore(flags); 2627 2628 return ret; 2629 } 2630 2631 static struct platform_device sev_guest_device = { 2632 .name = "sev-guest", 2633 .id = -1, 2634 }; 2635 2636 static int __init snp_init_platform_device(void) 2637 { 2638 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2639 return -ENODEV; 2640 2641 if (platform_device_register(&sev_guest_device)) 2642 return -ENODEV; 2643 2644 pr_info("SNP guest platform device initialized.\n"); 2645 return 0; 2646 } 2647 device_initcall(snp_init_platform_device); 2648 2649 void sev_show_status(void) 2650 { 2651 int i; 2652 2653 pr_info("Status: "); 2654 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 2655 if (sev_status & BIT_ULL(i)) { 2656 if (!sev_status_feat_names[i]) 2657 continue; 2658 2659 pr_cont("%s ", sev_status_feat_names[i]); 2660 } 2661 } 2662 pr_cont("\n"); 2663 } 2664 2665 void __init snp_update_svsm_ca(void) 2666 { 2667 if (!snp_vmpl) 2668 return; 2669 2670 /* Update the CAA to a proper kernel address */ 2671 boot_svsm_caa = &boot_svsm_ca_page; 2672 } 2673 2674 #ifdef CONFIG_SYSFS 2675 static ssize_t vmpl_show(struct kobject *kobj, 2676 struct kobj_attribute *attr, char *buf) 2677 { 2678 return sysfs_emit(buf, "%d\n", snp_vmpl); 2679 } 2680 2681 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 2682 2683 static struct attribute *vmpl_attrs[] = { 2684 &vmpl_attr.attr, 2685 NULL 2686 }; 2687 2688 static struct attribute_group sev_attr_group = { 2689 .attrs = vmpl_attrs, 2690 }; 2691 2692 static int __init sev_sysfs_init(void) 2693 { 2694 struct kobject *sev_kobj; 2695 struct device *dev_root; 2696 int ret; 2697 2698 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2699 return -ENODEV; 2700 2701 dev_root = bus_get_dev_root(&cpu_subsys); 2702 if (!dev_root) 2703 return -ENODEV; 2704 2705 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 2706 put_device(dev_root); 2707 2708 if (!sev_kobj) 2709 return -ENOMEM; 2710 2711 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 2712 if (ret) 2713 kobject_put(sev_kobj); 2714 2715 return ret; 2716 } 2717 arch_initcall(sev_sysfs_init); 2718 #endif // CONFIG_SYSFS 2719 2720 static void free_shared_pages(void *buf, size_t sz) 2721 { 2722 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2723 int ret; 2724 2725 if (!buf) 2726 return; 2727 2728 ret = set_memory_encrypted((unsigned long)buf, npages); 2729 if (ret) { 2730 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 2731 return; 2732 } 2733 2734 __free_pages(virt_to_page(buf), get_order(sz)); 2735 } 2736 2737 static void *alloc_shared_pages(size_t sz) 2738 { 2739 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2740 struct page *page; 2741 int ret; 2742 2743 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 2744 if (!page) 2745 return NULL; 2746 2747 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 2748 if (ret) { 2749 pr_err("failed to mark page shared, ret=%d\n", ret); 2750 __free_pages(page, get_order(sz)); 2751 return NULL; 2752 } 2753 2754 return page_address(page); 2755 } 2756 2757 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 2758 { 2759 u8 *key = NULL; 2760 2761 switch (id) { 2762 case 0: 2763 *seqno = &secrets->os_area.msg_seqno_0; 2764 key = secrets->vmpck0; 2765 break; 2766 case 1: 2767 *seqno = &secrets->os_area.msg_seqno_1; 2768 key = secrets->vmpck1; 2769 break; 2770 case 2: 2771 *seqno = &secrets->os_area.msg_seqno_2; 2772 key = secrets->vmpck2; 2773 break; 2774 case 3: 2775 *seqno = &secrets->os_area.msg_seqno_3; 2776 key = secrets->vmpck3; 2777 break; 2778 default: 2779 break; 2780 } 2781 2782 return key; 2783 } 2784 2785 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 2786 { 2787 struct aesgcm_ctx *ctx; 2788 2789 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 2790 if (!ctx) 2791 return NULL; 2792 2793 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 2794 pr_err("Crypto context initialization failed\n"); 2795 kfree(ctx); 2796 return NULL; 2797 } 2798 2799 return ctx; 2800 } 2801 2802 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 2803 { 2804 /* Adjust the default VMPCK key based on the executing VMPL level */ 2805 if (vmpck_id == -1) 2806 vmpck_id = snp_vmpl; 2807 2808 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 2809 if (!mdesc->vmpck) { 2810 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 2811 return -EINVAL; 2812 } 2813 2814 /* Verify that VMPCK is not zero. */ 2815 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 2816 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 2817 return -EINVAL; 2818 } 2819 2820 mdesc->vmpck_id = vmpck_id; 2821 2822 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 2823 if (!mdesc->ctx) 2824 return -ENOMEM; 2825 2826 return 0; 2827 } 2828 EXPORT_SYMBOL_GPL(snp_msg_init); 2829 2830 struct snp_msg_desc *snp_msg_alloc(void) 2831 { 2832 struct snp_msg_desc *mdesc; 2833 void __iomem *mem; 2834 2835 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 2836 2837 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); 2838 if (!mdesc) 2839 return ERR_PTR(-ENOMEM); 2840 2841 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 2842 if (!mem) 2843 goto e_free_mdesc; 2844 2845 mdesc->secrets = (__force struct snp_secrets_page *)mem; 2846 2847 /* Allocate the shared page used for the request and response message. */ 2848 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2849 if (!mdesc->request) 2850 goto e_unmap; 2851 2852 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2853 if (!mdesc->response) 2854 goto e_free_request; 2855 2856 return mdesc; 2857 2858 e_free_request: 2859 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2860 e_unmap: 2861 iounmap(mem); 2862 e_free_mdesc: 2863 kfree(mdesc); 2864 2865 return ERR_PTR(-ENOMEM); 2866 } 2867 EXPORT_SYMBOL_GPL(snp_msg_alloc); 2868 2869 void snp_msg_free(struct snp_msg_desc *mdesc) 2870 { 2871 if (!mdesc) 2872 return; 2873 2874 kfree(mdesc->ctx); 2875 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 2876 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2877 iounmap((__force void __iomem *)mdesc->secrets); 2878 2879 memset(mdesc, 0, sizeof(*mdesc)); 2880 kfree(mdesc); 2881 } 2882 EXPORT_SYMBOL_GPL(snp_msg_free); 2883 2884 /* Mutex to serialize the shared buffer access and command handling. */ 2885 static DEFINE_MUTEX(snp_cmd_mutex); 2886 2887 /* 2888 * If an error is received from the host or AMD Secure Processor (ASP) there 2889 * are two options. Either retry the exact same encrypted request or discontinue 2890 * using the VMPCK. 2891 * 2892 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 2893 * encrypt the requests. The IV for this scheme is the sequence number. GCM 2894 * cannot tolerate IV reuse. 2895 * 2896 * The ASP FW v1.51 only increments the sequence numbers on a successful 2897 * guest<->ASP back and forth and only accepts messages at its exact sequence 2898 * number. 2899 * 2900 * So if the sequence number were to be reused the encryption scheme is 2901 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 2902 * will reject the request. 2903 */ 2904 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 2905 { 2906 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 2907 mdesc->vmpck_id); 2908 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 2909 mdesc->vmpck = NULL; 2910 } 2911 2912 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2913 { 2914 u64 count; 2915 2916 lockdep_assert_held(&snp_cmd_mutex); 2917 2918 /* Read the current message sequence counter from secrets pages */ 2919 count = *mdesc->os_area_msg_seqno; 2920 2921 return count + 1; 2922 } 2923 2924 /* Return a non-zero on success */ 2925 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2926 { 2927 u64 count = __snp_get_msg_seqno(mdesc); 2928 2929 /* 2930 * The message sequence counter for the SNP guest request is a 64-bit 2931 * value but the version 2 of GHCB specification defines a 32-bit storage 2932 * for it. If the counter exceeds the 32-bit value then return zero. 2933 * The caller should check the return value, but if the caller happens to 2934 * not check the value and use it, then the firmware treats zero as an 2935 * invalid number and will fail the message request. 2936 */ 2937 if (count >= UINT_MAX) { 2938 pr_err("request message sequence counter overflow\n"); 2939 return 0; 2940 } 2941 2942 return count; 2943 } 2944 2945 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 2946 { 2947 /* 2948 * The counter is also incremented by the PSP, so increment it by 2 2949 * and save in secrets page. 2950 */ 2951 *mdesc->os_area_msg_seqno += 2; 2952 } 2953 2954 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 2955 { 2956 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 2957 struct snp_guest_msg *req_msg = &mdesc->secret_request; 2958 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 2959 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 2960 struct aesgcm_ctx *ctx = mdesc->ctx; 2961 u8 iv[GCM_AES_IV_SIZE] = {}; 2962 2963 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 2964 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 2965 resp_msg_hdr->msg_sz); 2966 2967 /* Copy response from shared memory to encrypted memory. */ 2968 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 2969 2970 /* Verify that the sequence counter is incremented by 1 */ 2971 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 2972 return -EBADMSG; 2973 2974 /* Verify response message type and version number. */ 2975 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 2976 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 2977 return -EBADMSG; 2978 2979 /* 2980 * If the message size is greater than our buffer length then return 2981 * an error. 2982 */ 2983 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 2984 return -EBADMSG; 2985 2986 /* Decrypt the payload */ 2987 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 2988 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 2989 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 2990 return -EBADMSG; 2991 2992 return 0; 2993 } 2994 2995 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 2996 { 2997 struct snp_guest_msg *msg = &mdesc->secret_request; 2998 struct snp_guest_msg_hdr *hdr = &msg->hdr; 2999 struct aesgcm_ctx *ctx = mdesc->ctx; 3000 u8 iv[GCM_AES_IV_SIZE] = {}; 3001 3002 memset(msg, 0, sizeof(*msg)); 3003 3004 hdr->algo = SNP_AEAD_AES_256_GCM; 3005 hdr->hdr_version = MSG_HDR_VER; 3006 hdr->hdr_sz = sizeof(*hdr); 3007 hdr->msg_type = req->msg_type; 3008 hdr->msg_version = req->msg_version; 3009 hdr->msg_seqno = seqno; 3010 hdr->msg_vmpck = req->vmpck_id; 3011 hdr->msg_sz = req->req_sz; 3012 3013 /* Verify the sequence number is non-zero */ 3014 if (!hdr->msg_seqno) 3015 return -ENOSR; 3016 3017 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 3018 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 3019 3020 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 3021 return -EBADMSG; 3022 3023 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 3024 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 3025 AAD_LEN, iv, hdr->authtag); 3026 3027 return 0; 3028 } 3029 3030 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3031 struct snp_guest_request_ioctl *rio) 3032 { 3033 unsigned long req_start = jiffies; 3034 unsigned int override_npages = 0; 3035 u64 override_err = 0; 3036 int rc; 3037 3038 retry_request: 3039 /* 3040 * Call firmware to process the request. In this function the encrypted 3041 * message enters shared memory with the host. So after this call the 3042 * sequence number must be incremented or the VMPCK must be deleted to 3043 * prevent reuse of the IV. 3044 */ 3045 rc = snp_issue_guest_request(req, &req->input, rio); 3046 switch (rc) { 3047 case -ENOSPC: 3048 /* 3049 * If the extended guest request fails due to having too 3050 * small of a certificate data buffer, retry the same 3051 * guest request without the extended data request in 3052 * order to increment the sequence number and thus avoid 3053 * IV reuse. 3054 */ 3055 override_npages = req->input.data_npages; 3056 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3057 3058 /* 3059 * Override the error to inform callers the given extended 3060 * request buffer size was too small and give the caller the 3061 * required buffer size. 3062 */ 3063 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 3064 3065 /* 3066 * If this call to the firmware succeeds, the sequence number can 3067 * be incremented allowing for continued use of the VMPCK. If 3068 * there is an error reflected in the return value, this value 3069 * is checked further down and the result will be the deletion 3070 * of the VMPCK and the error code being propagated back to the 3071 * user as an ioctl() return code. 3072 */ 3073 goto retry_request; 3074 3075 /* 3076 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 3077 * throttled. Retry in the driver to avoid returning and reusing the 3078 * message sequence number on a different message. 3079 */ 3080 case -EAGAIN: 3081 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 3082 rc = -ETIMEDOUT; 3083 break; 3084 } 3085 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 3086 goto retry_request; 3087 } 3088 3089 /* 3090 * Increment the message sequence number. There is no harm in doing 3091 * this now because decryption uses the value stored in the response 3092 * structure and any failure will wipe the VMPCK, preventing further 3093 * use anyway. 3094 */ 3095 snp_inc_msg_seqno(mdesc); 3096 3097 if (override_err) { 3098 rio->exitinfo2 = override_err; 3099 3100 /* 3101 * If an extended guest request was issued and the supplied certificate 3102 * buffer was not large enough, a standard guest request was issued to 3103 * prevent IV reuse. If the standard request was successful, return -EIO 3104 * back to the caller as would have originally been returned. 3105 */ 3106 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3107 rc = -EIO; 3108 } 3109 3110 if (override_npages) 3111 req->input.data_npages = override_npages; 3112 3113 return rc; 3114 } 3115 3116 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3117 struct snp_guest_request_ioctl *rio) 3118 { 3119 u64 seqno; 3120 int rc; 3121 3122 guard(mutex)(&snp_cmd_mutex); 3123 3124 /* Check if the VMPCK is not empty */ 3125 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 3126 pr_err_ratelimited("VMPCK is disabled\n"); 3127 return -ENOTTY; 3128 } 3129 3130 /* Get message sequence and verify that its a non-zero */ 3131 seqno = snp_get_msg_seqno(mdesc); 3132 if (!seqno) 3133 return -EIO; 3134 3135 /* Clear shared memory's response for the host to populate. */ 3136 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 3137 3138 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 3139 rc = enc_payload(mdesc, seqno, req); 3140 if (rc) 3141 return rc; 3142 3143 /* 3144 * Write the fully encrypted request to the shared unencrypted 3145 * request page. 3146 */ 3147 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 3148 3149 /* Initialize the input address for guest request */ 3150 req->input.req_gpa = __pa(mdesc->request); 3151 req->input.resp_gpa = __pa(mdesc->response); 3152 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; 3153 3154 rc = __handle_guest_request(mdesc, req, rio); 3155 if (rc) { 3156 if (rc == -EIO && 3157 rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3158 return rc; 3159 3160 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 3161 rc, rio->exitinfo2); 3162 3163 snp_disable_vmpck(mdesc); 3164 return rc; 3165 } 3166 3167 rc = verify_and_dec_payload(mdesc, req); 3168 if (rc) { 3169 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 3170 snp_disable_vmpck(mdesc); 3171 return rc; 3172 } 3173 3174 return 0; 3175 } 3176 EXPORT_SYMBOL_GPL(snp_send_guest_request); 3177 3178 static int __init snp_get_tsc_info(void) 3179 { 3180 struct snp_guest_request_ioctl *rio; 3181 struct snp_tsc_info_resp *tsc_resp; 3182 struct snp_tsc_info_req *tsc_req; 3183 struct snp_msg_desc *mdesc; 3184 struct snp_guest_req *req; 3185 int rc = -ENOMEM; 3186 3187 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); 3188 if (!tsc_req) 3189 return rc; 3190 3191 /* 3192 * The intermediate response buffer is used while decrypting the 3193 * response payload. Make sure that it has enough space to cover 3194 * the authtag. 3195 */ 3196 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 3197 if (!tsc_resp) 3198 goto e_free_tsc_req; 3199 3200 req = kzalloc(sizeof(*req), GFP_KERNEL); 3201 if (!req) 3202 goto e_free_tsc_resp; 3203 3204 rio = kzalloc(sizeof(*rio), GFP_KERNEL); 3205 if (!rio) 3206 goto e_free_req; 3207 3208 mdesc = snp_msg_alloc(); 3209 if (IS_ERR_OR_NULL(mdesc)) 3210 goto e_free_rio; 3211 3212 rc = snp_msg_init(mdesc, snp_vmpl); 3213 if (rc) 3214 goto e_free_mdesc; 3215 3216 req->msg_version = MSG_HDR_VER; 3217 req->msg_type = SNP_MSG_TSC_INFO_REQ; 3218 req->vmpck_id = snp_vmpl; 3219 req->req_buf = tsc_req; 3220 req->req_sz = sizeof(*tsc_req); 3221 req->resp_buf = (void *)tsc_resp; 3222 req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 3223 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3224 3225 rc = snp_send_guest_request(mdesc, req, rio); 3226 if (rc) 3227 goto e_request; 3228 3229 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 3230 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 3231 tsc_resp->tsc_factor); 3232 3233 if (!tsc_resp->status) { 3234 snp_tsc_scale = tsc_resp->tsc_scale; 3235 snp_tsc_offset = tsc_resp->tsc_offset; 3236 } else { 3237 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 3238 rc = -EIO; 3239 } 3240 3241 e_request: 3242 /* The response buffer contains sensitive data, explicitly clear it. */ 3243 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 3244 e_free_mdesc: 3245 snp_msg_free(mdesc); 3246 e_free_rio: 3247 kfree(rio); 3248 e_free_req: 3249 kfree(req); 3250 e_free_tsc_resp: 3251 kfree(tsc_resp); 3252 e_free_tsc_req: 3253 kfree(tsc_req); 3254 3255 return rc; 3256 } 3257 3258 void __init snp_secure_tsc_prepare(void) 3259 { 3260 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3261 return; 3262 3263 if (snp_get_tsc_info()) { 3264 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 3265 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 3266 } 3267 3268 pr_debug("SecureTSC enabled"); 3269 } 3270 3271 static unsigned long securetsc_get_tsc_khz(void) 3272 { 3273 return snp_tsc_freq_khz; 3274 } 3275 3276 void __init snp_secure_tsc_init(void) 3277 { 3278 unsigned long long tsc_freq_mhz; 3279 3280 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3281 return; 3282 3283 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 3284 rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 3285 snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); 3286 3287 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 3288 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 3289 } 3290