1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Memory Encryption Support 4 * 5 * Copyright (C) 2019 SUSE 6 * 7 * Author: Joerg Roedel <jroedel@suse.de> 8 */ 9 10 #define pr_fmt(fmt) "SEV: " fmt 11 12 #include <linux/sched/debug.h> /* For show_regs() */ 13 #include <linux/percpu-defs.h> 14 #include <linux/cc_platform.h> 15 #include <linux/printk.h> 16 #include <linux/mm_types.h> 17 #include <linux/set_memory.h> 18 #include <linux/memblock.h> 19 #include <linux/kernel.h> 20 #include <linux/mm.h> 21 #include <linux/cpumask.h> 22 #include <linux/efi.h> 23 #include <linux/platform_device.h> 24 #include <linux/io.h> 25 #include <linux/psp-sev.h> 26 #include <linux/dmi.h> 27 #include <uapi/linux/sev-guest.h> 28 #include <crypto/gcm.h> 29 30 #include <asm/init.h> 31 #include <asm/cpu_entry_area.h> 32 #include <asm/stacktrace.h> 33 #include <asm/sev.h> 34 #include <asm/insn-eval.h> 35 #include <asm/fpu/xcr.h> 36 #include <asm/processor.h> 37 #include <asm/realmode.h> 38 #include <asm/setup.h> 39 #include <asm/traps.h> 40 #include <asm/svm.h> 41 #include <asm/smp.h> 42 #include <asm/cpu.h> 43 #include <asm/apic.h> 44 #include <asm/cpuid.h> 45 #include <asm/cmdline.h> 46 #include <asm/msr.h> 47 48 #define DR7_RESET_VALUE 0x400 49 50 /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ 51 #define AP_INIT_CS_LIMIT 0xffff 52 #define AP_INIT_DS_LIMIT 0xffff 53 #define AP_INIT_LDTR_LIMIT 0xffff 54 #define AP_INIT_GDTR_LIMIT 0xffff 55 #define AP_INIT_IDTR_LIMIT 0xffff 56 #define AP_INIT_TR_LIMIT 0xffff 57 #define AP_INIT_RFLAGS_DEFAULT 0x2 58 #define AP_INIT_DR6_DEFAULT 0xffff0ff0 59 #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 60 #define AP_INIT_XCR0_DEFAULT 0x1 61 #define AP_INIT_X87_FTW_DEFAULT 0x5555 62 #define AP_INIT_X87_FCW_DEFAULT 0x0040 63 #define AP_INIT_CR0_DEFAULT 0x60000010 64 #define AP_INIT_MXCSR_DEFAULT 0x1f80 65 66 static const char * const sev_status_feat_names[] = { 67 [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", 68 [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", 69 [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", 70 [MSR_AMD64_SNP_VTOM_BIT] = "vTom", 71 [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", 72 [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", 73 [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", 74 [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", 75 [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", 76 [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", 77 [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", 78 [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", 79 [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", 80 [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", 81 [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", 82 [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", 83 }; 84 85 /* For early boot hypervisor communication in SEV-ES enabled guests */ 86 static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); 87 88 /* 89 * Needs to be in the .data section because we need it NULL before bss is 90 * cleared 91 */ 92 static struct ghcb *boot_ghcb __section(".data"); 93 94 /* Bitmap of SEV features supported by the hypervisor */ 95 static u64 sev_hv_features __ro_after_init; 96 97 /* Secrets page physical address from the CC blob */ 98 static u64 secrets_pa __ro_after_init; 99 100 /* 101 * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and 102 * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated 103 * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). 104 */ 105 static u64 snp_tsc_scale __ro_after_init; 106 static u64 snp_tsc_offset __ro_after_init; 107 static u64 snp_tsc_freq_khz __ro_after_init; 108 109 /* #VC handler runtime per-CPU data */ 110 struct sev_es_runtime_data { 111 struct ghcb ghcb_page; 112 113 /* 114 * Reserve one page per CPU as backup storage for the unencrypted GHCB. 115 * It is needed when an NMI happens while the #VC handler uses the real 116 * GHCB, and the NMI handler itself is causing another #VC exception. In 117 * that case the GHCB content of the first handler needs to be backed up 118 * and restored. 119 */ 120 struct ghcb backup_ghcb; 121 122 /* 123 * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. 124 * There is no need for it to be atomic, because nothing is written to 125 * the GHCB between the read and the write of ghcb_active. So it is safe 126 * to use it when a nested #VC exception happens before the write. 127 * 128 * This is necessary for example in the #VC->NMI->#VC case when the NMI 129 * happens while the first #VC handler uses the GHCB. When the NMI code 130 * raises a second #VC handler it might overwrite the contents of the 131 * GHCB written by the first handler. To avoid this the content of the 132 * GHCB is saved and restored when the GHCB is detected to be in use 133 * already. 134 */ 135 bool ghcb_active; 136 bool backup_ghcb_active; 137 138 /* 139 * Cached DR7 value - write it on DR7 writes and return it on reads. 140 * That value will never make it to the real hardware DR7 as debugging 141 * is currently unsupported in SEV-ES guests. 142 */ 143 unsigned long dr7; 144 }; 145 146 struct ghcb_state { 147 struct ghcb *ghcb; 148 }; 149 150 /* For early boot SVSM communication */ 151 static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); 152 153 static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); 154 static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); 155 static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); 156 static DEFINE_PER_CPU(u64, svsm_caa_pa); 157 158 static __always_inline bool on_vc_stack(struct pt_regs *regs) 159 { 160 unsigned long sp = regs->sp; 161 162 /* User-mode RSP is not trusted */ 163 if (user_mode(regs)) 164 return false; 165 166 /* SYSCALL gap still has user-mode RSP */ 167 if (ip_within_syscall_gap(regs)) 168 return false; 169 170 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 171 } 172 173 /* 174 * This function handles the case when an NMI is raised in the #VC 175 * exception handler entry code, before the #VC handler has switched off 176 * its IST stack. In this case, the IST entry for #VC must be adjusted, 177 * so that any nested #VC exception will not overwrite the stack 178 * contents of the interrupted #VC handler. 179 * 180 * The IST entry is adjusted unconditionally so that it can be also be 181 * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a 182 * nested sev_es_ist_exit() call may adjust back the IST entry too 183 * early. 184 * 185 * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run 186 * on the NMI IST stack, as they are only called from NMI handling code 187 * right now. 188 */ 189 void noinstr __sev_es_ist_enter(struct pt_regs *regs) 190 { 191 unsigned long old_ist, new_ist; 192 193 /* Read old IST entry */ 194 new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 195 196 /* 197 * If NMI happened while on the #VC IST stack, set the new IST 198 * value below regs->sp, so that the interrupted stack frame is 199 * not overwritten by subsequent #VC exceptions. 200 */ 201 if (on_vc_stack(regs)) 202 new_ist = regs->sp; 203 204 /* 205 * Reserve additional 8 bytes and store old IST value so this 206 * adjustment can be unrolled in __sev_es_ist_exit(). 207 */ 208 new_ist -= sizeof(old_ist); 209 *(unsigned long *)new_ist = old_ist; 210 211 /* Set new IST entry */ 212 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); 213 } 214 215 void noinstr __sev_es_ist_exit(void) 216 { 217 unsigned long ist; 218 219 /* Read IST entry */ 220 ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 221 222 if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) 223 return; 224 225 /* Read back old IST entry and write it to the TSS */ 226 this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); 227 } 228 229 /* 230 * Nothing shall interrupt this code path while holding the per-CPU 231 * GHCB. The backup GHCB is only for NMIs interrupting this path. 232 * 233 * Callers must disable local interrupts around it. 234 */ 235 static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) 236 { 237 struct sev_es_runtime_data *data; 238 struct ghcb *ghcb; 239 240 WARN_ON(!irqs_disabled()); 241 242 data = this_cpu_read(runtime_data); 243 ghcb = &data->ghcb_page; 244 245 if (unlikely(data->ghcb_active)) { 246 /* GHCB is already in use - save its contents */ 247 248 if (unlikely(data->backup_ghcb_active)) { 249 /* 250 * Backup-GHCB is also already in use. There is no way 251 * to continue here so just kill the machine. To make 252 * panic() work, mark GHCBs inactive so that messages 253 * can be printed out. 254 */ 255 data->ghcb_active = false; 256 data->backup_ghcb_active = false; 257 258 instrumentation_begin(); 259 panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); 260 instrumentation_end(); 261 } 262 263 /* Mark backup_ghcb active before writing to it */ 264 data->backup_ghcb_active = true; 265 266 state->ghcb = &data->backup_ghcb; 267 268 /* Backup GHCB content */ 269 *state->ghcb = *ghcb; 270 } else { 271 state->ghcb = NULL; 272 data->ghcb_active = true; 273 } 274 275 return ghcb; 276 } 277 278 static inline u64 sev_es_rd_ghcb_msr(void) 279 { 280 return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB); 281 } 282 283 static __always_inline void sev_es_wr_ghcb_msr(u64 val) 284 { 285 u32 low, high; 286 287 low = (u32)(val); 288 high = (u32)(val >> 32); 289 290 native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); 291 } 292 293 static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, 294 unsigned char *buffer) 295 { 296 return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); 297 } 298 299 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) 300 { 301 char buffer[MAX_INSN_SIZE]; 302 int insn_bytes; 303 304 insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 305 if (insn_bytes == 0) { 306 /* Nothing could be copied */ 307 ctxt->fi.vector = X86_TRAP_PF; 308 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; 309 ctxt->fi.cr2 = ctxt->regs->ip; 310 return ES_EXCEPTION; 311 } else if (insn_bytes == -EINVAL) { 312 /* Effective RIP could not be calculated */ 313 ctxt->fi.vector = X86_TRAP_GP; 314 ctxt->fi.error_code = 0; 315 ctxt->fi.cr2 = 0; 316 return ES_EXCEPTION; 317 } 318 319 if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) 320 return ES_DECODE_FAILED; 321 322 if (ctxt->insn.immediate.got) 323 return ES_OK; 324 else 325 return ES_DECODE_FAILED; 326 } 327 328 static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) 329 { 330 char buffer[MAX_INSN_SIZE]; 331 int res, ret; 332 333 res = vc_fetch_insn_kernel(ctxt, buffer); 334 if (res) { 335 ctxt->fi.vector = X86_TRAP_PF; 336 ctxt->fi.error_code = X86_PF_INSTR; 337 ctxt->fi.cr2 = ctxt->regs->ip; 338 return ES_EXCEPTION; 339 } 340 341 ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); 342 if (ret < 0) 343 return ES_DECODE_FAILED; 344 else 345 return ES_OK; 346 } 347 348 static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) 349 { 350 if (user_mode(ctxt->regs)) 351 return __vc_decode_user_insn(ctxt); 352 else 353 return __vc_decode_kern_insn(ctxt); 354 } 355 356 static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, 357 char *dst, char *buf, size_t size) 358 { 359 unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; 360 361 /* 362 * This function uses __put_user() independent of whether kernel or user 363 * memory is accessed. This works fine because __put_user() does no 364 * sanity checks of the pointer being accessed. All that it does is 365 * to report when the access failed. 366 * 367 * Also, this function runs in atomic context, so __put_user() is not 368 * allowed to sleep. The page-fault handler detects that it is running 369 * in atomic context and will not try to take mmap_sem and handle the 370 * fault, so additional pagefault_enable()/disable() calls are not 371 * needed. 372 * 373 * The access can't be done via copy_to_user() here because 374 * vc_write_mem() must not use string instructions to access unsafe 375 * memory. The reason is that MOVS is emulated by the #VC handler by 376 * splitting the move up into a read and a write and taking a nested #VC 377 * exception on whatever of them is the MMIO access. Using string 378 * instructions here would cause infinite nesting. 379 */ 380 switch (size) { 381 case 1: { 382 u8 d1; 383 u8 __user *target = (u8 __user *)dst; 384 385 memcpy(&d1, buf, 1); 386 if (__put_user(d1, target)) 387 goto fault; 388 break; 389 } 390 case 2: { 391 u16 d2; 392 u16 __user *target = (u16 __user *)dst; 393 394 memcpy(&d2, buf, 2); 395 if (__put_user(d2, target)) 396 goto fault; 397 break; 398 } 399 case 4: { 400 u32 d4; 401 u32 __user *target = (u32 __user *)dst; 402 403 memcpy(&d4, buf, 4); 404 if (__put_user(d4, target)) 405 goto fault; 406 break; 407 } 408 case 8: { 409 u64 d8; 410 u64 __user *target = (u64 __user *)dst; 411 412 memcpy(&d8, buf, 8); 413 if (__put_user(d8, target)) 414 goto fault; 415 break; 416 } 417 default: 418 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 419 return ES_UNSUPPORTED; 420 } 421 422 return ES_OK; 423 424 fault: 425 if (user_mode(ctxt->regs)) 426 error_code |= X86_PF_USER; 427 428 ctxt->fi.vector = X86_TRAP_PF; 429 ctxt->fi.error_code = error_code; 430 ctxt->fi.cr2 = (unsigned long)dst; 431 432 return ES_EXCEPTION; 433 } 434 435 static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, 436 char *src, char *buf, size_t size) 437 { 438 unsigned long error_code = X86_PF_PROT; 439 440 /* 441 * This function uses __get_user() independent of whether kernel or user 442 * memory is accessed. This works fine because __get_user() does no 443 * sanity checks of the pointer being accessed. All that it does is 444 * to report when the access failed. 445 * 446 * Also, this function runs in atomic context, so __get_user() is not 447 * allowed to sleep. The page-fault handler detects that it is running 448 * in atomic context and will not try to take mmap_sem and handle the 449 * fault, so additional pagefault_enable()/disable() calls are not 450 * needed. 451 * 452 * The access can't be done via copy_from_user() here because 453 * vc_read_mem() must not use string instructions to access unsafe 454 * memory. The reason is that MOVS is emulated by the #VC handler by 455 * splitting the move up into a read and a write and taking a nested #VC 456 * exception on whatever of them is the MMIO access. Using string 457 * instructions here would cause infinite nesting. 458 */ 459 switch (size) { 460 case 1: { 461 u8 d1; 462 u8 __user *s = (u8 __user *)src; 463 464 if (__get_user(d1, s)) 465 goto fault; 466 memcpy(buf, &d1, 1); 467 break; 468 } 469 case 2: { 470 u16 d2; 471 u16 __user *s = (u16 __user *)src; 472 473 if (__get_user(d2, s)) 474 goto fault; 475 memcpy(buf, &d2, 2); 476 break; 477 } 478 case 4: { 479 u32 d4; 480 u32 __user *s = (u32 __user *)src; 481 482 if (__get_user(d4, s)) 483 goto fault; 484 memcpy(buf, &d4, 4); 485 break; 486 } 487 case 8: { 488 u64 d8; 489 u64 __user *s = (u64 __user *)src; 490 if (__get_user(d8, s)) 491 goto fault; 492 memcpy(buf, &d8, 8); 493 break; 494 } 495 default: 496 WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); 497 return ES_UNSUPPORTED; 498 } 499 500 return ES_OK; 501 502 fault: 503 if (user_mode(ctxt->regs)) 504 error_code |= X86_PF_USER; 505 506 ctxt->fi.vector = X86_TRAP_PF; 507 ctxt->fi.error_code = error_code; 508 ctxt->fi.cr2 = (unsigned long)src; 509 510 return ES_EXCEPTION; 511 } 512 513 static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 514 unsigned long vaddr, phys_addr_t *paddr) 515 { 516 unsigned long va = (unsigned long)vaddr; 517 unsigned int level; 518 phys_addr_t pa; 519 pgd_t *pgd; 520 pte_t *pte; 521 522 pgd = __va(read_cr3_pa()); 523 pgd = &pgd[pgd_index(va)]; 524 pte = lookup_address_in_pgd(pgd, va, &level); 525 if (!pte) { 526 ctxt->fi.vector = X86_TRAP_PF; 527 ctxt->fi.cr2 = vaddr; 528 ctxt->fi.error_code = 0; 529 530 if (user_mode(ctxt->regs)) 531 ctxt->fi.error_code |= X86_PF_USER; 532 533 return ES_EXCEPTION; 534 } 535 536 if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) 537 /* Emulated MMIO to/from encrypted memory not supported */ 538 return ES_UNSUPPORTED; 539 540 pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; 541 pa |= va & ~page_level_mask(level); 542 543 *paddr = pa; 544 545 return ES_OK; 546 } 547 548 static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) 549 { 550 BUG_ON(size > 4); 551 552 if (user_mode(ctxt->regs)) { 553 struct thread_struct *t = ¤t->thread; 554 struct io_bitmap *iobm = t->io_bitmap; 555 size_t idx; 556 557 if (!iobm) 558 goto fault; 559 560 for (idx = port; idx < port + size; ++idx) { 561 if (test_bit(idx, iobm->bitmap)) 562 goto fault; 563 } 564 } 565 566 return ES_OK; 567 568 fault: 569 ctxt->fi.vector = X86_TRAP_GP; 570 ctxt->fi.error_code = 0; 571 572 return ES_EXCEPTION; 573 } 574 575 static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) 576 { 577 long error_code = ctxt->fi.error_code; 578 int trapnr = ctxt->fi.vector; 579 580 ctxt->regs->orig_ax = ctxt->fi.error_code; 581 582 switch (trapnr) { 583 case X86_TRAP_GP: 584 exc_general_protection(ctxt->regs, error_code); 585 break; 586 case X86_TRAP_UD: 587 exc_invalid_op(ctxt->regs); 588 break; 589 case X86_TRAP_PF: 590 write_cr2(ctxt->fi.cr2); 591 exc_page_fault(ctxt->regs, error_code); 592 break; 593 case X86_TRAP_AC: 594 exc_alignment_check(ctxt->regs, error_code); 595 break; 596 default: 597 pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); 598 BUG(); 599 } 600 } 601 602 /* Include code shared with pre-decompression boot stage */ 603 #include "shared.c" 604 605 static inline struct svsm_ca *svsm_get_caa(void) 606 { 607 /* 608 * Use rIP-relative references when called early in the boot. If 609 * ->use_cas is set, then it is late in the boot and no need 610 * to worry about rIP-relative references. 611 */ 612 if (RIP_REL_REF(sev_cfg).use_cas) 613 return this_cpu_read(svsm_caa); 614 else 615 return RIP_REL_REF(boot_svsm_caa); 616 } 617 618 static u64 svsm_get_caa_pa(void) 619 { 620 /* 621 * Use rIP-relative references when called early in the boot. If 622 * ->use_cas is set, then it is late in the boot and no need 623 * to worry about rIP-relative references. 624 */ 625 if (RIP_REL_REF(sev_cfg).use_cas) 626 return this_cpu_read(svsm_caa_pa); 627 else 628 return RIP_REL_REF(boot_svsm_caa_pa); 629 } 630 631 static noinstr void __sev_put_ghcb(struct ghcb_state *state) 632 { 633 struct sev_es_runtime_data *data; 634 struct ghcb *ghcb; 635 636 WARN_ON(!irqs_disabled()); 637 638 data = this_cpu_read(runtime_data); 639 ghcb = &data->ghcb_page; 640 641 if (state->ghcb) { 642 /* Restore GHCB from Backup */ 643 *ghcb = *state->ghcb; 644 data->backup_ghcb_active = false; 645 state->ghcb = NULL; 646 } else { 647 /* 648 * Invalidate the GHCB so a VMGEXIT instruction issued 649 * from userspace won't appear to be valid. 650 */ 651 vc_ghcb_invalidate(ghcb); 652 data->ghcb_active = false; 653 } 654 } 655 656 static int svsm_perform_call_protocol(struct svsm_call *call) 657 { 658 struct ghcb_state state; 659 unsigned long flags; 660 struct ghcb *ghcb; 661 int ret; 662 663 /* 664 * This can be called very early in the boot, use native functions in 665 * order to avoid paravirt issues. 666 */ 667 flags = native_local_irq_save(); 668 669 /* 670 * Use rip-relative references when called early in the boot. If 671 * ghcbs_initialized is set, then it is late in the boot and no need 672 * to worry about rip-relative references in called functions. 673 */ 674 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 675 ghcb = __sev_get_ghcb(&state); 676 else if (RIP_REL_REF(boot_ghcb)) 677 ghcb = RIP_REL_REF(boot_ghcb); 678 else 679 ghcb = NULL; 680 681 do { 682 ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) 683 : svsm_perform_msr_protocol(call); 684 } while (ret == -EAGAIN); 685 686 if (RIP_REL_REF(sev_cfg).ghcbs_initialized) 687 __sev_put_ghcb(&state); 688 689 native_local_irq_restore(flags); 690 691 return ret; 692 } 693 694 void noinstr __sev_es_nmi_complete(void) 695 { 696 struct ghcb_state state; 697 struct ghcb *ghcb; 698 699 ghcb = __sev_get_ghcb(&state); 700 701 vc_ghcb_invalidate(ghcb); 702 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); 703 ghcb_set_sw_exit_info_1(ghcb, 0); 704 ghcb_set_sw_exit_info_2(ghcb, 0); 705 706 sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); 707 VMGEXIT(); 708 709 __sev_put_ghcb(&state); 710 } 711 712 static u64 __init get_snp_jump_table_addr(void) 713 { 714 struct snp_secrets_page *secrets; 715 void __iomem *mem; 716 u64 addr; 717 718 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 719 if (!mem) { 720 pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); 721 return 0; 722 } 723 724 secrets = (__force struct snp_secrets_page *)mem; 725 726 addr = secrets->os_area.ap_jump_table_pa; 727 iounmap(mem); 728 729 return addr; 730 } 731 732 static u64 __init get_jump_table_addr(void) 733 { 734 struct ghcb_state state; 735 unsigned long flags; 736 struct ghcb *ghcb; 737 u64 ret = 0; 738 739 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 740 return get_snp_jump_table_addr(); 741 742 local_irq_save(flags); 743 744 ghcb = __sev_get_ghcb(&state); 745 746 vc_ghcb_invalidate(ghcb); 747 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); 748 ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); 749 ghcb_set_sw_exit_info_2(ghcb, 0); 750 751 sev_es_wr_ghcb_msr(__pa(ghcb)); 752 VMGEXIT(); 753 754 if (ghcb_sw_exit_info_1_is_valid(ghcb) && 755 ghcb_sw_exit_info_2_is_valid(ghcb)) 756 ret = ghcb->save.sw_exit_info_2; 757 758 __sev_put_ghcb(&state); 759 760 local_irq_restore(flags); 761 762 return ret; 763 } 764 765 static void __head 766 early_set_pages_state(unsigned long vaddr, unsigned long paddr, 767 unsigned long npages, enum psc_op op) 768 { 769 unsigned long paddr_end; 770 u64 val; 771 772 vaddr = vaddr & PAGE_MASK; 773 774 paddr = paddr & PAGE_MASK; 775 paddr_end = paddr + (npages << PAGE_SHIFT); 776 777 while (paddr < paddr_end) { 778 /* Page validation must be rescinded before changing to shared */ 779 if (op == SNP_PAGE_STATE_SHARED) 780 pvalidate_4k_page(vaddr, paddr, false); 781 782 /* 783 * Use the MSR protocol because this function can be called before 784 * the GHCB is established. 785 */ 786 sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); 787 VMGEXIT(); 788 789 val = sev_es_rd_ghcb_msr(); 790 791 if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) 792 goto e_term; 793 794 if (GHCB_MSR_PSC_RESP_VAL(val)) 795 goto e_term; 796 797 /* Page validation must be performed after changing to private */ 798 if (op == SNP_PAGE_STATE_PRIVATE) 799 pvalidate_4k_page(vaddr, paddr, true); 800 801 vaddr += PAGE_SIZE; 802 paddr += PAGE_SIZE; 803 } 804 805 return; 806 807 e_term: 808 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 809 } 810 811 void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, 812 unsigned long npages) 813 { 814 /* 815 * This can be invoked in early boot while running identity mapped, so 816 * use an open coded check for SNP instead of using cc_platform_has(). 817 * This eliminates worries about jump tables or checking boot_cpu_data 818 * in the cc_platform_has() function. 819 */ 820 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 821 return; 822 823 /* 824 * Ask the hypervisor to mark the memory pages as private in the RMP 825 * table. 826 */ 827 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); 828 } 829 830 void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, 831 unsigned long npages) 832 { 833 /* 834 * This can be invoked in early boot while running identity mapped, so 835 * use an open coded check for SNP instead of using cc_platform_has(). 836 * This eliminates worries about jump tables or checking boot_cpu_data 837 * in the cc_platform_has() function. 838 */ 839 if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) 840 return; 841 842 /* Ask hypervisor to mark the memory pages shared in the RMP table. */ 843 early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); 844 } 845 846 static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, 847 unsigned long vaddr_end, int op) 848 { 849 struct ghcb_state state; 850 bool use_large_entry; 851 struct psc_hdr *hdr; 852 struct psc_entry *e; 853 unsigned long flags; 854 unsigned long pfn; 855 struct ghcb *ghcb; 856 int i; 857 858 hdr = &data->hdr; 859 e = data->entries; 860 861 memset(data, 0, sizeof(*data)); 862 i = 0; 863 864 while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { 865 hdr->end_entry = i; 866 867 if (is_vmalloc_addr((void *)vaddr)) { 868 pfn = vmalloc_to_pfn((void *)vaddr); 869 use_large_entry = false; 870 } else { 871 pfn = __pa(vaddr) >> PAGE_SHIFT; 872 use_large_entry = true; 873 } 874 875 e->gfn = pfn; 876 e->operation = op; 877 878 if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && 879 (vaddr_end - vaddr) >= PMD_SIZE) { 880 e->pagesize = RMP_PG_SIZE_2M; 881 vaddr += PMD_SIZE; 882 } else { 883 e->pagesize = RMP_PG_SIZE_4K; 884 vaddr += PAGE_SIZE; 885 } 886 887 e++; 888 i++; 889 } 890 891 /* Page validation must be rescinded before changing to shared */ 892 if (op == SNP_PAGE_STATE_SHARED) 893 pvalidate_pages(data); 894 895 local_irq_save(flags); 896 897 if (sev_cfg.ghcbs_initialized) 898 ghcb = __sev_get_ghcb(&state); 899 else 900 ghcb = boot_ghcb; 901 902 /* Invoke the hypervisor to perform the page state changes */ 903 if (!ghcb || vmgexit_psc(ghcb, data)) 904 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); 905 906 if (sev_cfg.ghcbs_initialized) 907 __sev_put_ghcb(&state); 908 909 local_irq_restore(flags); 910 911 /* Page validation must be performed after changing to private */ 912 if (op == SNP_PAGE_STATE_PRIVATE) 913 pvalidate_pages(data); 914 915 return vaddr; 916 } 917 918 static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) 919 { 920 struct snp_psc_desc desc; 921 unsigned long vaddr_end; 922 923 /* Use the MSR protocol when a GHCB is not available. */ 924 if (!boot_ghcb) 925 return early_set_pages_state(vaddr, __pa(vaddr), npages, op); 926 927 vaddr = vaddr & PAGE_MASK; 928 vaddr_end = vaddr + (npages << PAGE_SHIFT); 929 930 while (vaddr < vaddr_end) 931 vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); 932 } 933 934 void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) 935 { 936 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 937 return; 938 939 set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); 940 } 941 942 void snp_set_memory_private(unsigned long vaddr, unsigned long npages) 943 { 944 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 945 return; 946 947 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 948 } 949 950 void snp_accept_memory(phys_addr_t start, phys_addr_t end) 951 { 952 unsigned long vaddr, npages; 953 954 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 955 return; 956 957 vaddr = (unsigned long)__va(start); 958 npages = (end - start) >> PAGE_SHIFT; 959 960 set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); 961 } 962 963 static void set_pte_enc(pte_t *kpte, int level, void *va) 964 { 965 struct pte_enc_desc d = { 966 .kpte = kpte, 967 .pte_level = level, 968 .va = va, 969 .encrypt = true 970 }; 971 972 prepare_pte_enc(&d); 973 set_pte_enc_mask(kpte, d.pfn, d.new_pgprot); 974 } 975 976 static void unshare_all_memory(void) 977 { 978 unsigned long addr, end, size, ghcb; 979 struct sev_es_runtime_data *data; 980 unsigned int npages, level; 981 bool skipped_addr; 982 pte_t *pte; 983 int cpu; 984 985 /* Unshare the direct mapping. */ 986 addr = PAGE_OFFSET; 987 end = PAGE_OFFSET + get_max_mapped(); 988 989 while (addr < end) { 990 pte = lookup_address(addr, &level); 991 size = page_level_size(level); 992 npages = size / PAGE_SIZE; 993 skipped_addr = false; 994 995 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) { 996 addr += size; 997 continue; 998 } 999 1000 /* 1001 * Ensure that all the per-CPU GHCBs are made private at the 1002 * end of the unsharing loop so that the switch to the slower 1003 * MSR protocol happens last. 1004 */ 1005 for_each_possible_cpu(cpu) { 1006 data = per_cpu(runtime_data, cpu); 1007 ghcb = (unsigned long)&data->ghcb_page; 1008 1009 if (addr <= ghcb && ghcb <= addr + size) { 1010 skipped_addr = true; 1011 break; 1012 } 1013 } 1014 1015 if (!skipped_addr) { 1016 set_pte_enc(pte, level, (void *)addr); 1017 snp_set_memory_private(addr, npages); 1018 } 1019 addr += size; 1020 } 1021 1022 /* Unshare all bss decrypted memory. */ 1023 addr = (unsigned long)__start_bss_decrypted; 1024 end = (unsigned long)__start_bss_decrypted_unused; 1025 npages = (end - addr) >> PAGE_SHIFT; 1026 1027 for (; addr < end; addr += PAGE_SIZE) { 1028 pte = lookup_address(addr, &level); 1029 if (!pte || !pte_decrypted(*pte) || pte_none(*pte)) 1030 continue; 1031 1032 set_pte_enc(pte, level, (void *)addr); 1033 } 1034 addr = (unsigned long)__start_bss_decrypted; 1035 snp_set_memory_private(addr, npages); 1036 1037 __flush_tlb_all(); 1038 } 1039 1040 /* Stop new private<->shared conversions */ 1041 void snp_kexec_begin(void) 1042 { 1043 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1044 return; 1045 1046 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1047 return; 1048 1049 /* 1050 * Crash kernel ends up here with interrupts disabled: can't wait for 1051 * conversions to finish. 1052 * 1053 * If race happened, just report and proceed. 1054 */ 1055 if (!set_memory_enc_stop_conversion()) 1056 pr_warn("Failed to stop shared<->private conversions\n"); 1057 } 1058 1059 void snp_kexec_finish(void) 1060 { 1061 struct sev_es_runtime_data *data; 1062 unsigned int level, cpu; 1063 unsigned long size; 1064 struct ghcb *ghcb; 1065 pte_t *pte; 1066 1067 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1068 return; 1069 1070 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 1071 return; 1072 1073 unshare_all_memory(); 1074 1075 /* 1076 * Switch to using the MSR protocol to change per-CPU GHCBs to 1077 * private. All the per-CPU GHCBs have been switched back to private, 1078 * so can't do any more GHCB calls to the hypervisor beyond this point 1079 * until the kexec'ed kernel starts running. 1080 */ 1081 boot_ghcb = NULL; 1082 sev_cfg.ghcbs_initialized = false; 1083 1084 for_each_possible_cpu(cpu) { 1085 data = per_cpu(runtime_data, cpu); 1086 ghcb = &data->ghcb_page; 1087 pte = lookup_address((unsigned long)ghcb, &level); 1088 size = page_level_size(level); 1089 set_pte_enc(pte, level, (void *)ghcb); 1090 snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE)); 1091 } 1092 } 1093 1094 static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa) 1095 { 1096 int ret; 1097 1098 if (snp_vmpl) { 1099 struct svsm_call call = {}; 1100 unsigned long flags; 1101 1102 local_irq_save(flags); 1103 1104 call.caa = this_cpu_read(svsm_caa); 1105 call.rcx = __pa(va); 1106 1107 if (make_vmsa) { 1108 /* Protocol 0, Call ID 2 */ 1109 call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU); 1110 call.rdx = __pa(caa); 1111 call.r8 = apic_id; 1112 } else { 1113 /* Protocol 0, Call ID 3 */ 1114 call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU); 1115 } 1116 1117 ret = svsm_perform_call_protocol(&call); 1118 1119 local_irq_restore(flags); 1120 } else { 1121 /* 1122 * If the kernel runs at VMPL0, it can change the VMSA 1123 * bit for a page using the RMPADJUST instruction. 1124 * However, for the instruction to succeed it must 1125 * target the permissions of a lesser privileged (higher 1126 * numbered) VMPL level, so use VMPL1. 1127 */ 1128 u64 attrs = 1; 1129 1130 if (make_vmsa) 1131 attrs |= RMPADJUST_VMSA_PAGE_BIT; 1132 1133 ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 1134 } 1135 1136 return ret; 1137 } 1138 1139 #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) 1140 #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) 1141 #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) 1142 1143 #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) 1144 #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) 1145 1146 static void *snp_alloc_vmsa_page(int cpu) 1147 { 1148 struct page *p; 1149 1150 /* 1151 * Allocate VMSA page to work around the SNP erratum where the CPU will 1152 * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) 1153 * collides with the RMP entry of VMSA page. The recommended workaround 1154 * is to not use a large page. 1155 * 1156 * Allocate an 8k page which is also 8k-aligned. 1157 */ 1158 p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 1159 if (!p) 1160 return NULL; 1161 1162 split_page(p, 1); 1163 1164 /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ 1165 __free_page(p); 1166 1167 return page_address(p + 1); 1168 } 1169 1170 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id) 1171 { 1172 int err; 1173 1174 err = snp_set_vmsa(vmsa, NULL, apic_id, false); 1175 if (err) 1176 pr_err("clear VMSA page failed (%u), leaking page\n", err); 1177 else 1178 free_page((unsigned long)vmsa); 1179 } 1180 1181 static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) 1182 { 1183 struct sev_es_save_area *cur_vmsa, *vmsa; 1184 struct ghcb_state state; 1185 struct svsm_ca *caa; 1186 unsigned long flags; 1187 struct ghcb *ghcb; 1188 u8 sipi_vector; 1189 int cpu, ret; 1190 u64 cr4; 1191 1192 /* 1193 * The hypervisor SNP feature support check has happened earlier, just check 1194 * the AP_CREATION one here. 1195 */ 1196 if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) 1197 return -EOPNOTSUPP; 1198 1199 /* 1200 * Verify the desired start IP against the known trampoline start IP 1201 * to catch any future new trampolines that may be introduced that 1202 * would require a new protected guest entry point. 1203 */ 1204 if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, 1205 "Unsupported SNP start_ip: %lx\n", start_ip)) 1206 return -EINVAL; 1207 1208 /* Override start_ip with known protected guest start IP */ 1209 start_ip = real_mode_header->sev_es_trampoline_start; 1210 1211 /* Find the logical CPU for the APIC ID */ 1212 for_each_present_cpu(cpu) { 1213 if (arch_match_cpu_phys_id(cpu, apic_id)) 1214 break; 1215 } 1216 if (cpu >= nr_cpu_ids) 1217 return -EINVAL; 1218 1219 cur_vmsa = per_cpu(sev_vmsa, cpu); 1220 1221 /* 1222 * A new VMSA is created each time because there is no guarantee that 1223 * the current VMSA is the kernels or that the vCPU is not running. If 1224 * an attempt was done to use the current VMSA with a running vCPU, a 1225 * #VMEXIT of that vCPU would wipe out all of the settings being done 1226 * here. 1227 */ 1228 vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(cpu); 1229 if (!vmsa) 1230 return -ENOMEM; 1231 1232 /* If an SVSM is present, the SVSM per-CPU CAA will be !NULL */ 1233 caa = per_cpu(svsm_caa, cpu); 1234 1235 /* CR4 should maintain the MCE value */ 1236 cr4 = native_read_cr4() & X86_CR4_MCE; 1237 1238 /* Set the CS value based on the start_ip converted to a SIPI vector */ 1239 sipi_vector = (start_ip >> 12); 1240 vmsa->cs.base = sipi_vector << 12; 1241 vmsa->cs.limit = AP_INIT_CS_LIMIT; 1242 vmsa->cs.attrib = INIT_CS_ATTRIBS; 1243 vmsa->cs.selector = sipi_vector << 8; 1244 1245 /* Set the RIP value based on start_ip */ 1246 vmsa->rip = start_ip & 0xfff; 1247 1248 /* Set AP INIT defaults as documented in the APM */ 1249 vmsa->ds.limit = AP_INIT_DS_LIMIT; 1250 vmsa->ds.attrib = INIT_DS_ATTRIBS; 1251 vmsa->es = vmsa->ds; 1252 vmsa->fs = vmsa->ds; 1253 vmsa->gs = vmsa->ds; 1254 vmsa->ss = vmsa->ds; 1255 1256 vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; 1257 vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; 1258 vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; 1259 vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; 1260 vmsa->tr.limit = AP_INIT_TR_LIMIT; 1261 vmsa->tr.attrib = INIT_TR_ATTRIBS; 1262 1263 vmsa->cr4 = cr4; 1264 vmsa->cr0 = AP_INIT_CR0_DEFAULT; 1265 vmsa->dr7 = DR7_RESET_VALUE; 1266 vmsa->dr6 = AP_INIT_DR6_DEFAULT; 1267 vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; 1268 vmsa->g_pat = AP_INIT_GPAT_DEFAULT; 1269 vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; 1270 vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; 1271 vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; 1272 vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; 1273 1274 /* SVME must be set. */ 1275 vmsa->efer = EFER_SVME; 1276 1277 /* 1278 * Set the SNP-specific fields for this VMSA: 1279 * VMPL level 1280 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 1281 */ 1282 vmsa->vmpl = snp_vmpl; 1283 vmsa->sev_features = sev_status >> 2; 1284 1285 /* Populate AP's TSC scale/offset to get accurate TSC values. */ 1286 if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { 1287 vmsa->tsc_scale = snp_tsc_scale; 1288 vmsa->tsc_offset = snp_tsc_offset; 1289 } 1290 1291 /* Switch the page over to a VMSA page now that it is initialized */ 1292 ret = snp_set_vmsa(vmsa, caa, apic_id, true); 1293 if (ret) { 1294 pr_err("set VMSA page failed (%u)\n", ret); 1295 free_page((unsigned long)vmsa); 1296 1297 return -EINVAL; 1298 } 1299 1300 /* Issue VMGEXIT AP Creation NAE event */ 1301 local_irq_save(flags); 1302 1303 ghcb = __sev_get_ghcb(&state); 1304 1305 vc_ghcb_invalidate(ghcb); 1306 ghcb_set_rax(ghcb, vmsa->sev_features); 1307 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); 1308 ghcb_set_sw_exit_info_1(ghcb, 1309 ((u64)apic_id << 32) | 1310 ((u64)snp_vmpl << 16) | 1311 SVM_VMGEXIT_AP_CREATE); 1312 ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); 1313 1314 sev_es_wr_ghcb_msr(__pa(ghcb)); 1315 VMGEXIT(); 1316 1317 if (!ghcb_sw_exit_info_1_is_valid(ghcb) || 1318 lower_32_bits(ghcb->save.sw_exit_info_1)) { 1319 pr_err("SNP AP Creation error\n"); 1320 ret = -EINVAL; 1321 } 1322 1323 __sev_put_ghcb(&state); 1324 1325 local_irq_restore(flags); 1326 1327 /* Perform cleanup if there was an error */ 1328 if (ret) { 1329 snp_cleanup_vmsa(vmsa, apic_id); 1330 vmsa = NULL; 1331 } 1332 1333 /* Free up any previous VMSA page */ 1334 if (cur_vmsa) 1335 snp_cleanup_vmsa(cur_vmsa, apic_id); 1336 1337 /* Record the current VMSA page */ 1338 per_cpu(sev_vmsa, cpu) = vmsa; 1339 1340 return ret; 1341 } 1342 1343 void __init snp_set_wakeup_secondary_cpu(void) 1344 { 1345 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1346 return; 1347 1348 /* 1349 * Always set this override if SNP is enabled. This makes it the 1350 * required method to start APs under SNP. If the hypervisor does 1351 * not support AP creation, then no APs will be started. 1352 */ 1353 apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); 1354 } 1355 1356 int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) 1357 { 1358 u16 startup_cs, startup_ip; 1359 phys_addr_t jump_table_pa; 1360 u64 jump_table_addr; 1361 u16 __iomem *jump_table; 1362 1363 jump_table_addr = get_jump_table_addr(); 1364 1365 /* On UP guests there is no jump table so this is not a failure */ 1366 if (!jump_table_addr) 1367 return 0; 1368 1369 /* Check if AP Jump Table is page-aligned */ 1370 if (jump_table_addr & ~PAGE_MASK) 1371 return -EINVAL; 1372 1373 jump_table_pa = jump_table_addr & PAGE_MASK; 1374 1375 startup_cs = (u16)(rmh->trampoline_start >> 4); 1376 startup_ip = (u16)(rmh->sev_es_trampoline_start - 1377 rmh->trampoline_start); 1378 1379 jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); 1380 if (!jump_table) 1381 return -EIO; 1382 1383 writew(startup_ip, &jump_table[0]); 1384 writew(startup_cs, &jump_table[1]); 1385 1386 iounmap(jump_table); 1387 1388 return 0; 1389 } 1390 1391 /* 1392 * This is needed by the OVMF UEFI firmware which will use whatever it finds in 1393 * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu 1394 * runtime GHCBs used by the kernel are also mapped in the EFI page-table. 1395 */ 1396 int __init sev_es_efi_map_ghcbs(pgd_t *pgd) 1397 { 1398 struct sev_es_runtime_data *data; 1399 unsigned long address, pflags; 1400 int cpu; 1401 u64 pfn; 1402 1403 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1404 return 0; 1405 1406 pflags = _PAGE_NX | _PAGE_RW; 1407 1408 for_each_possible_cpu(cpu) { 1409 data = per_cpu(runtime_data, cpu); 1410 1411 address = __pa(&data->ghcb_page); 1412 pfn = address >> PAGE_SHIFT; 1413 1414 if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) 1415 return 1; 1416 } 1417 1418 return 0; 1419 } 1420 1421 /* Writes to the SVSM CAA MSR are ignored */ 1422 static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) 1423 { 1424 if (write) 1425 return ES_OK; 1426 1427 regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); 1428 regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); 1429 1430 return ES_OK; 1431 } 1432 1433 /* 1434 * TSC related accesses should not exit to the hypervisor when a guest is 1435 * executing with Secure TSC enabled, so special handling is required for 1436 * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. 1437 */ 1438 static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) 1439 { 1440 u64 tsc; 1441 1442 /* 1443 * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. 1444 * Terminate the SNP guest when the interception is enabled. 1445 */ 1446 if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) 1447 return ES_VMM_ERROR; 1448 1449 /* 1450 * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC 1451 * to return undefined values, so ignore all writes. 1452 * 1453 * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use 1454 * the value returned by rdtsc_ordered(). 1455 */ 1456 if (write) { 1457 WARN_ONCE(1, "TSC MSR writes are verboten!\n"); 1458 return ES_OK; 1459 } 1460 1461 tsc = rdtsc_ordered(); 1462 regs->ax = lower_32_bits(tsc); 1463 regs->dx = upper_32_bits(tsc); 1464 1465 return ES_OK; 1466 } 1467 1468 static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1469 { 1470 struct pt_regs *regs = ctxt->regs; 1471 enum es_result ret; 1472 bool write; 1473 1474 /* Is it a WRMSR? */ 1475 write = ctxt->insn.opcode.bytes[1] == 0x30; 1476 1477 switch (regs->cx) { 1478 case MSR_SVSM_CAA: 1479 return __vc_handle_msr_caa(regs, write); 1480 case MSR_IA32_TSC: 1481 case MSR_AMD64_GUEST_TSC_FREQ: 1482 if (sev_status & MSR_AMD64_SNP_SECURE_TSC) 1483 return __vc_handle_secure_tsc_msrs(regs, write); 1484 break; 1485 default: 1486 break; 1487 } 1488 1489 ghcb_set_rcx(ghcb, regs->cx); 1490 if (write) { 1491 ghcb_set_rax(ghcb, regs->ax); 1492 ghcb_set_rdx(ghcb, regs->dx); 1493 } 1494 1495 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); 1496 1497 if ((ret == ES_OK) && !write) { 1498 regs->ax = ghcb->save.rax; 1499 regs->dx = ghcb->save.rdx; 1500 } 1501 1502 return ret; 1503 } 1504 1505 static void snp_register_per_cpu_ghcb(void) 1506 { 1507 struct sev_es_runtime_data *data; 1508 struct ghcb *ghcb; 1509 1510 data = this_cpu_read(runtime_data); 1511 ghcb = &data->ghcb_page; 1512 1513 snp_register_ghcb_early(__pa(ghcb)); 1514 } 1515 1516 void setup_ghcb(void) 1517 { 1518 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1519 return; 1520 1521 /* 1522 * Check whether the runtime #VC exception handler is active. It uses 1523 * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). 1524 * 1525 * If SNP is active, register the per-CPU GHCB page so that the runtime 1526 * exception handler can use it. 1527 */ 1528 if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { 1529 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1530 snp_register_per_cpu_ghcb(); 1531 1532 sev_cfg.ghcbs_initialized = true; 1533 1534 return; 1535 } 1536 1537 /* 1538 * Make sure the hypervisor talks a supported protocol. 1539 * This gets called only in the BSP boot phase. 1540 */ 1541 if (!sev_es_negotiate_protocol()) 1542 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 1543 1544 /* 1545 * Clear the boot_ghcb. The first exception comes in before the bss 1546 * section is cleared. 1547 */ 1548 memset(&boot_ghcb_page, 0, PAGE_SIZE); 1549 1550 /* Alright - Make the boot-ghcb public */ 1551 boot_ghcb = &boot_ghcb_page; 1552 1553 /* SNP guest requires that GHCB GPA must be registered. */ 1554 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 1555 snp_register_ghcb_early(__pa(&boot_ghcb_page)); 1556 } 1557 1558 #ifdef CONFIG_HOTPLUG_CPU 1559 static void sev_es_ap_hlt_loop(void) 1560 { 1561 struct ghcb_state state; 1562 struct ghcb *ghcb; 1563 1564 ghcb = __sev_get_ghcb(&state); 1565 1566 while (true) { 1567 vc_ghcb_invalidate(ghcb); 1568 ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); 1569 ghcb_set_sw_exit_info_1(ghcb, 0); 1570 ghcb_set_sw_exit_info_2(ghcb, 0); 1571 1572 sev_es_wr_ghcb_msr(__pa(ghcb)); 1573 VMGEXIT(); 1574 1575 /* Wakeup signal? */ 1576 if (ghcb_sw_exit_info_2_is_valid(ghcb) && 1577 ghcb->save.sw_exit_info_2) 1578 break; 1579 } 1580 1581 __sev_put_ghcb(&state); 1582 } 1583 1584 /* 1585 * Play_dead handler when running under SEV-ES. This is needed because 1586 * the hypervisor can't deliver an SIPI request to restart the AP. 1587 * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the 1588 * hypervisor wakes it up again. 1589 */ 1590 static void sev_es_play_dead(void) 1591 { 1592 play_dead_common(); 1593 1594 /* IRQs now disabled */ 1595 1596 sev_es_ap_hlt_loop(); 1597 1598 /* 1599 * If we get here, the VCPU was woken up again. Jump to CPU 1600 * startup code to get it back online. 1601 */ 1602 soft_restart_cpu(); 1603 } 1604 #else /* CONFIG_HOTPLUG_CPU */ 1605 #define sev_es_play_dead native_play_dead 1606 #endif /* CONFIG_HOTPLUG_CPU */ 1607 1608 #ifdef CONFIG_SMP 1609 static void __init sev_es_setup_play_dead(void) 1610 { 1611 smp_ops.play_dead = sev_es_play_dead; 1612 } 1613 #else 1614 static inline void sev_es_setup_play_dead(void) { } 1615 #endif 1616 1617 static void __init alloc_runtime_data(int cpu) 1618 { 1619 struct sev_es_runtime_data *data; 1620 1621 data = memblock_alloc_node(sizeof(*data), PAGE_SIZE, cpu_to_node(cpu)); 1622 if (!data) 1623 panic("Can't allocate SEV-ES runtime data"); 1624 1625 per_cpu(runtime_data, cpu) = data; 1626 1627 if (snp_vmpl) { 1628 struct svsm_ca *caa; 1629 1630 /* Allocate the SVSM CA page if an SVSM is present */ 1631 caa = memblock_alloc_or_panic(sizeof(*caa), PAGE_SIZE); 1632 1633 per_cpu(svsm_caa, cpu) = caa; 1634 per_cpu(svsm_caa_pa, cpu) = __pa(caa); 1635 } 1636 } 1637 1638 static void __init init_ghcb(int cpu) 1639 { 1640 struct sev_es_runtime_data *data; 1641 int err; 1642 1643 data = per_cpu(runtime_data, cpu); 1644 1645 err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, 1646 sizeof(data->ghcb_page)); 1647 if (err) 1648 panic("Can't map GHCBs unencrypted"); 1649 1650 memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); 1651 1652 data->ghcb_active = false; 1653 data->backup_ghcb_active = false; 1654 } 1655 1656 void __init sev_es_init_vc_handling(void) 1657 { 1658 int cpu; 1659 1660 BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); 1661 1662 if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) 1663 return; 1664 1665 if (!sev_es_check_cpu_features()) 1666 panic("SEV-ES CPU Features missing"); 1667 1668 /* 1669 * SNP is supported in v2 of the GHCB spec which mandates support for HV 1670 * features. 1671 */ 1672 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { 1673 sev_hv_features = get_hv_features(); 1674 1675 if (!(sev_hv_features & GHCB_HV_FT_SNP)) 1676 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 1677 } 1678 1679 /* Initialize per-cpu GHCB pages */ 1680 for_each_possible_cpu(cpu) { 1681 alloc_runtime_data(cpu); 1682 init_ghcb(cpu); 1683 } 1684 1685 /* If running under an SVSM, switch to the per-cpu CA */ 1686 if (snp_vmpl) { 1687 struct svsm_call call = {}; 1688 unsigned long flags; 1689 int ret; 1690 1691 local_irq_save(flags); 1692 1693 /* 1694 * SVSM_CORE_REMAP_CA call: 1695 * RAX = 0 (Protocol=0, CallID=0) 1696 * RCX = New CA GPA 1697 */ 1698 call.caa = svsm_get_caa(); 1699 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 1700 call.rcx = this_cpu_read(svsm_caa_pa); 1701 ret = svsm_perform_call_protocol(&call); 1702 if (ret) 1703 panic("Can't remap the SVSM CA, ret=%d, rax_out=0x%llx\n", 1704 ret, call.rax_out); 1705 1706 sev_cfg.use_cas = true; 1707 1708 local_irq_restore(flags); 1709 } 1710 1711 sev_es_setup_play_dead(); 1712 1713 /* Secondary CPUs use the runtime #VC handler */ 1714 initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; 1715 } 1716 1717 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) 1718 { 1719 int trapnr = ctxt->fi.vector; 1720 1721 if (trapnr == X86_TRAP_PF) 1722 native_write_cr2(ctxt->fi.cr2); 1723 1724 ctxt->regs->orig_ax = ctxt->fi.error_code; 1725 do_early_exception(ctxt->regs, trapnr); 1726 } 1727 1728 static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) 1729 { 1730 long *reg_array; 1731 int offset; 1732 1733 reg_array = (long *)ctxt->regs; 1734 offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); 1735 1736 if (offset < 0) 1737 return NULL; 1738 1739 offset /= sizeof(long); 1740 1741 return reg_array + offset; 1742 } 1743 static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, 1744 unsigned int bytes, bool read) 1745 { 1746 u64 exit_code, exit_info_1, exit_info_2; 1747 unsigned long ghcb_pa = __pa(ghcb); 1748 enum es_result res; 1749 phys_addr_t paddr; 1750 void __user *ref; 1751 1752 ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); 1753 if (ref == (void __user *)-1L) 1754 return ES_UNSUPPORTED; 1755 1756 exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; 1757 1758 res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); 1759 if (res != ES_OK) { 1760 if (res == ES_EXCEPTION && !read) 1761 ctxt->fi.error_code |= X86_PF_WRITE; 1762 1763 return res; 1764 } 1765 1766 exit_info_1 = paddr; 1767 /* Can never be greater than 8 */ 1768 exit_info_2 = bytes; 1769 1770 ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); 1771 1772 return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); 1773 } 1774 1775 /* 1776 * The MOVS instruction has two memory operands, which raises the 1777 * problem that it is not known whether the access to the source or the 1778 * destination caused the #VC exception (and hence whether an MMIO read 1779 * or write operation needs to be emulated). 1780 * 1781 * Instead of playing games with walking page-tables and trying to guess 1782 * whether the source or destination is an MMIO range, split the move 1783 * into two operations, a read and a write with only one memory operand. 1784 * This will cause a nested #VC exception on the MMIO address which can 1785 * then be handled. 1786 * 1787 * This implementation has the benefit that it also supports MOVS where 1788 * source _and_ destination are MMIO regions. 1789 * 1790 * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a 1791 * rare operation. If it turns out to be a performance problem the split 1792 * operations can be moved to memcpy_fromio() and memcpy_toio(). 1793 */ 1794 static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, 1795 unsigned int bytes) 1796 { 1797 unsigned long ds_base, es_base; 1798 unsigned char *src, *dst; 1799 unsigned char buffer[8]; 1800 enum es_result ret; 1801 bool rep; 1802 int off; 1803 1804 ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); 1805 es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); 1806 1807 if (ds_base == -1L || es_base == -1L) { 1808 ctxt->fi.vector = X86_TRAP_GP; 1809 ctxt->fi.error_code = 0; 1810 return ES_EXCEPTION; 1811 } 1812 1813 src = ds_base + (unsigned char *)ctxt->regs->si; 1814 dst = es_base + (unsigned char *)ctxt->regs->di; 1815 1816 ret = vc_read_mem(ctxt, src, buffer, bytes); 1817 if (ret != ES_OK) 1818 return ret; 1819 1820 ret = vc_write_mem(ctxt, dst, buffer, bytes); 1821 if (ret != ES_OK) 1822 return ret; 1823 1824 if (ctxt->regs->flags & X86_EFLAGS_DF) 1825 off = -bytes; 1826 else 1827 off = bytes; 1828 1829 ctxt->regs->si += off; 1830 ctxt->regs->di += off; 1831 1832 rep = insn_has_rep_prefix(&ctxt->insn); 1833 if (rep) 1834 ctxt->regs->cx -= 1; 1835 1836 if (!rep || ctxt->regs->cx == 0) 1837 return ES_OK; 1838 else 1839 return ES_RETRY; 1840 } 1841 1842 static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1843 { 1844 struct insn *insn = &ctxt->insn; 1845 enum insn_mmio_type mmio; 1846 unsigned int bytes = 0; 1847 enum es_result ret; 1848 u8 sign_byte; 1849 long *reg_data; 1850 1851 mmio = insn_decode_mmio(insn, &bytes); 1852 if (mmio == INSN_MMIO_DECODE_FAILED) 1853 return ES_DECODE_FAILED; 1854 1855 if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { 1856 reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); 1857 if (!reg_data) 1858 return ES_DECODE_FAILED; 1859 } 1860 1861 if (user_mode(ctxt->regs)) 1862 return ES_UNSUPPORTED; 1863 1864 switch (mmio) { 1865 case INSN_MMIO_WRITE: 1866 memcpy(ghcb->shared_buffer, reg_data, bytes); 1867 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1868 break; 1869 case INSN_MMIO_WRITE_IMM: 1870 memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); 1871 ret = vc_do_mmio(ghcb, ctxt, bytes, false); 1872 break; 1873 case INSN_MMIO_READ: 1874 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1875 if (ret) 1876 break; 1877 1878 /* Zero-extend for 32-bit operation */ 1879 if (bytes == 4) 1880 *reg_data = 0; 1881 1882 memcpy(reg_data, ghcb->shared_buffer, bytes); 1883 break; 1884 case INSN_MMIO_READ_ZERO_EXTEND: 1885 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1886 if (ret) 1887 break; 1888 1889 /* Zero extend based on operand size */ 1890 memset(reg_data, 0, insn->opnd_bytes); 1891 memcpy(reg_data, ghcb->shared_buffer, bytes); 1892 break; 1893 case INSN_MMIO_READ_SIGN_EXTEND: 1894 ret = vc_do_mmio(ghcb, ctxt, bytes, true); 1895 if (ret) 1896 break; 1897 1898 if (bytes == 1) { 1899 u8 *val = (u8 *)ghcb->shared_buffer; 1900 1901 sign_byte = (*val & 0x80) ? 0xff : 0x00; 1902 } else { 1903 u16 *val = (u16 *)ghcb->shared_buffer; 1904 1905 sign_byte = (*val & 0x8000) ? 0xff : 0x00; 1906 } 1907 1908 /* Sign extend based on operand size */ 1909 memset(reg_data, sign_byte, insn->opnd_bytes); 1910 memcpy(reg_data, ghcb->shared_buffer, bytes); 1911 break; 1912 case INSN_MMIO_MOVS: 1913 ret = vc_handle_mmio_movs(ctxt, bytes); 1914 break; 1915 default: 1916 ret = ES_UNSUPPORTED; 1917 break; 1918 } 1919 1920 return ret; 1921 } 1922 1923 static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, 1924 struct es_em_ctxt *ctxt) 1925 { 1926 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1927 long val, *reg = vc_insn_get_rm(ctxt); 1928 enum es_result ret; 1929 1930 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1931 return ES_VMM_ERROR; 1932 1933 if (!reg) 1934 return ES_DECODE_FAILED; 1935 1936 val = *reg; 1937 1938 /* Upper 32 bits must be written as zeroes */ 1939 if (val >> 32) { 1940 ctxt->fi.vector = X86_TRAP_GP; 1941 ctxt->fi.error_code = 0; 1942 return ES_EXCEPTION; 1943 } 1944 1945 /* Clear out other reserved bits and set bit 10 */ 1946 val = (val & 0xffff23ffL) | BIT(10); 1947 1948 /* Early non-zero writes to DR7 are not supported */ 1949 if (!data && (val & ~DR7_RESET_VALUE)) 1950 return ES_UNSUPPORTED; 1951 1952 /* Using a value of 0 for ExitInfo1 means RAX holds the value */ 1953 ghcb_set_rax(ghcb, val); 1954 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); 1955 if (ret != ES_OK) 1956 return ret; 1957 1958 if (data) 1959 data->dr7 = val; 1960 1961 return ES_OK; 1962 } 1963 1964 static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, 1965 struct es_em_ctxt *ctxt) 1966 { 1967 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1968 long *reg = vc_insn_get_rm(ctxt); 1969 1970 if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) 1971 return ES_VMM_ERROR; 1972 1973 if (!reg) 1974 return ES_DECODE_FAILED; 1975 1976 if (data) 1977 *reg = data->dr7; 1978 else 1979 *reg = DR7_RESET_VALUE; 1980 1981 return ES_OK; 1982 } 1983 1984 static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, 1985 struct es_em_ctxt *ctxt) 1986 { 1987 return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); 1988 } 1989 1990 static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) 1991 { 1992 enum es_result ret; 1993 1994 ghcb_set_rcx(ghcb, ctxt->regs->cx); 1995 1996 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); 1997 if (ret != ES_OK) 1998 return ret; 1999 2000 if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) 2001 return ES_VMM_ERROR; 2002 2003 ctxt->regs->ax = ghcb->save.rax; 2004 ctxt->regs->dx = ghcb->save.rdx; 2005 2006 return ES_OK; 2007 } 2008 2009 static enum es_result vc_handle_monitor(struct ghcb *ghcb, 2010 struct es_em_ctxt *ctxt) 2011 { 2012 /* 2013 * Treat it as a NOP and do not leak a physical address to the 2014 * hypervisor. 2015 */ 2016 return ES_OK; 2017 } 2018 2019 static enum es_result vc_handle_mwait(struct ghcb *ghcb, 2020 struct es_em_ctxt *ctxt) 2021 { 2022 /* Treat the same as MONITOR/MONITORX */ 2023 return ES_OK; 2024 } 2025 2026 static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, 2027 struct es_em_ctxt *ctxt) 2028 { 2029 enum es_result ret; 2030 2031 ghcb_set_rax(ghcb, ctxt->regs->ax); 2032 ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); 2033 2034 if (x86_platform.hyper.sev_es_hcall_prepare) 2035 x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); 2036 2037 ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); 2038 if (ret != ES_OK) 2039 return ret; 2040 2041 if (!ghcb_rax_is_valid(ghcb)) 2042 return ES_VMM_ERROR; 2043 2044 ctxt->regs->ax = ghcb->save.rax; 2045 2046 /* 2047 * Call sev_es_hcall_finish() after regs->ax is already set. 2048 * This allows the hypervisor handler to overwrite it again if 2049 * necessary. 2050 */ 2051 if (x86_platform.hyper.sev_es_hcall_finish && 2052 !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) 2053 return ES_VMM_ERROR; 2054 2055 return ES_OK; 2056 } 2057 2058 static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, 2059 struct es_em_ctxt *ctxt) 2060 { 2061 /* 2062 * Calling ecx_alignment_check() directly does not work, because it 2063 * enables IRQs and the GHCB is active. Forward the exception and call 2064 * it later from vc_forward_exception(). 2065 */ 2066 ctxt->fi.vector = X86_TRAP_AC; 2067 ctxt->fi.error_code = 0; 2068 return ES_EXCEPTION; 2069 } 2070 2071 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, 2072 struct ghcb *ghcb, 2073 unsigned long exit_code) 2074 { 2075 enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); 2076 2077 if (result != ES_OK) 2078 return result; 2079 2080 switch (exit_code) { 2081 case SVM_EXIT_READ_DR7: 2082 result = vc_handle_dr7_read(ghcb, ctxt); 2083 break; 2084 case SVM_EXIT_WRITE_DR7: 2085 result = vc_handle_dr7_write(ghcb, ctxt); 2086 break; 2087 case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: 2088 result = vc_handle_trap_ac(ghcb, ctxt); 2089 break; 2090 case SVM_EXIT_RDTSC: 2091 case SVM_EXIT_RDTSCP: 2092 result = vc_handle_rdtsc(ghcb, ctxt, exit_code); 2093 break; 2094 case SVM_EXIT_RDPMC: 2095 result = vc_handle_rdpmc(ghcb, ctxt); 2096 break; 2097 case SVM_EXIT_INVD: 2098 pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); 2099 result = ES_UNSUPPORTED; 2100 break; 2101 case SVM_EXIT_CPUID: 2102 result = vc_handle_cpuid(ghcb, ctxt); 2103 break; 2104 case SVM_EXIT_IOIO: 2105 result = vc_handle_ioio(ghcb, ctxt); 2106 break; 2107 case SVM_EXIT_MSR: 2108 result = vc_handle_msr(ghcb, ctxt); 2109 break; 2110 case SVM_EXIT_VMMCALL: 2111 result = vc_handle_vmmcall(ghcb, ctxt); 2112 break; 2113 case SVM_EXIT_WBINVD: 2114 result = vc_handle_wbinvd(ghcb, ctxt); 2115 break; 2116 case SVM_EXIT_MONITOR: 2117 result = vc_handle_monitor(ghcb, ctxt); 2118 break; 2119 case SVM_EXIT_MWAIT: 2120 result = vc_handle_mwait(ghcb, ctxt); 2121 break; 2122 case SVM_EXIT_NPF: 2123 result = vc_handle_mmio(ghcb, ctxt); 2124 break; 2125 default: 2126 /* 2127 * Unexpected #VC exception 2128 */ 2129 result = ES_UNSUPPORTED; 2130 } 2131 2132 return result; 2133 } 2134 2135 static __always_inline bool is_vc2_stack(unsigned long sp) 2136 { 2137 return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); 2138 } 2139 2140 static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) 2141 { 2142 unsigned long sp, prev_sp; 2143 2144 sp = (unsigned long)regs; 2145 prev_sp = regs->sp; 2146 2147 /* 2148 * If the code was already executing on the VC2 stack when the #VC 2149 * happened, let it proceed to the normal handling routine. This way the 2150 * code executing on the VC2 stack can cause #VC exceptions to get handled. 2151 */ 2152 return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); 2153 } 2154 2155 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) 2156 { 2157 struct ghcb_state state; 2158 struct es_em_ctxt ctxt; 2159 enum es_result result; 2160 struct ghcb *ghcb; 2161 bool ret = true; 2162 2163 ghcb = __sev_get_ghcb(&state); 2164 2165 vc_ghcb_invalidate(ghcb); 2166 result = vc_init_em_ctxt(&ctxt, regs, error_code); 2167 2168 if (result == ES_OK) 2169 result = vc_handle_exitcode(&ctxt, ghcb, error_code); 2170 2171 __sev_put_ghcb(&state); 2172 2173 /* Done - now check the result */ 2174 switch (result) { 2175 case ES_OK: 2176 vc_finish_insn(&ctxt); 2177 break; 2178 case ES_UNSUPPORTED: 2179 pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", 2180 error_code, regs->ip); 2181 ret = false; 2182 break; 2183 case ES_VMM_ERROR: 2184 pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2185 error_code, regs->ip); 2186 ret = false; 2187 break; 2188 case ES_DECODE_FAILED: 2189 pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2190 error_code, regs->ip); 2191 ret = false; 2192 break; 2193 case ES_EXCEPTION: 2194 vc_forward_exception(&ctxt); 2195 break; 2196 case ES_RETRY: 2197 /* Nothing to do */ 2198 break; 2199 default: 2200 pr_emerg("Unknown result in %s():%d\n", __func__, result); 2201 /* 2202 * Emulating the instruction which caused the #VC exception 2203 * failed - can't continue so print debug information 2204 */ 2205 BUG(); 2206 } 2207 2208 return ret; 2209 } 2210 2211 static __always_inline bool vc_is_db(unsigned long error_code) 2212 { 2213 return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; 2214 } 2215 2216 /* 2217 * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode 2218 * and will panic when an error happens. 2219 */ 2220 DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) 2221 { 2222 irqentry_state_t irq_state; 2223 2224 /* 2225 * With the current implementation it is always possible to switch to a 2226 * safe stack because #VC exceptions only happen at known places, like 2227 * intercepted instructions or accesses to MMIO areas/IO ports. They can 2228 * also happen with code instrumentation when the hypervisor intercepts 2229 * #DB, but the critical paths are forbidden to be instrumented, so #DB 2230 * exceptions currently also only happen in safe places. 2231 * 2232 * But keep this here in case the noinstr annotations are violated due 2233 * to bug elsewhere. 2234 */ 2235 if (unlikely(vc_from_invalid_context(regs))) { 2236 instrumentation_begin(); 2237 panic("Can't handle #VC exception from unsupported context\n"); 2238 instrumentation_end(); 2239 } 2240 2241 /* 2242 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2243 */ 2244 if (vc_is_db(error_code)) { 2245 exc_debug(regs); 2246 return; 2247 } 2248 2249 irq_state = irqentry_nmi_enter(regs); 2250 2251 instrumentation_begin(); 2252 2253 if (!vc_raw_handle_exception(regs, error_code)) { 2254 /* Show some debug info */ 2255 show_regs(regs); 2256 2257 /* Ask hypervisor to sev_es_terminate */ 2258 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2259 2260 /* If that fails and we get here - just panic */ 2261 panic("Returned from Terminate-Request to Hypervisor\n"); 2262 } 2263 2264 instrumentation_end(); 2265 irqentry_nmi_exit(regs, irq_state); 2266 } 2267 2268 /* 2269 * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode 2270 * and will kill the current task with SIGBUS when an error happens. 2271 */ 2272 DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) 2273 { 2274 /* 2275 * Handle #DB before calling into !noinstr code to avoid recursive #DB. 2276 */ 2277 if (vc_is_db(error_code)) { 2278 noist_exc_debug(regs); 2279 return; 2280 } 2281 2282 irqentry_enter_from_user_mode(regs); 2283 instrumentation_begin(); 2284 2285 if (!vc_raw_handle_exception(regs, error_code)) { 2286 /* 2287 * Do not kill the machine if user-space triggered the 2288 * exception. Send SIGBUS instead and let user-space deal with 2289 * it. 2290 */ 2291 force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); 2292 } 2293 2294 instrumentation_end(); 2295 irqentry_exit_to_user_mode(regs); 2296 } 2297 2298 bool __init handle_vc_boot_ghcb(struct pt_regs *regs) 2299 { 2300 unsigned long exit_code = regs->orig_ax; 2301 struct es_em_ctxt ctxt; 2302 enum es_result result; 2303 2304 vc_ghcb_invalidate(boot_ghcb); 2305 2306 result = vc_init_em_ctxt(&ctxt, regs, exit_code); 2307 if (result == ES_OK) 2308 result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); 2309 2310 /* Done - now check the result */ 2311 switch (result) { 2312 case ES_OK: 2313 vc_finish_insn(&ctxt); 2314 break; 2315 case ES_UNSUPPORTED: 2316 early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", 2317 exit_code, regs->ip); 2318 goto fail; 2319 case ES_VMM_ERROR: 2320 early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", 2321 exit_code, regs->ip); 2322 goto fail; 2323 case ES_DECODE_FAILED: 2324 early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", 2325 exit_code, regs->ip); 2326 goto fail; 2327 case ES_EXCEPTION: 2328 vc_early_forward_exception(&ctxt); 2329 break; 2330 case ES_RETRY: 2331 /* Nothing to do */ 2332 break; 2333 default: 2334 BUG(); 2335 } 2336 2337 return true; 2338 2339 fail: 2340 show_regs(regs); 2341 2342 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); 2343 } 2344 2345 /* 2346 * Initial set up of SNP relies on information provided by the 2347 * Confidential Computing blob, which can be passed to the kernel 2348 * in the following ways, depending on how it is booted: 2349 * 2350 * - when booted via the boot/decompress kernel: 2351 * - via boot_params 2352 * 2353 * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): 2354 * - via a setup_data entry, as defined by the Linux Boot Protocol 2355 * 2356 * Scan for the blob in that order. 2357 */ 2358 static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) 2359 { 2360 struct cc_blob_sev_info *cc_info; 2361 2362 /* Boot kernel would have passed the CC blob via boot_params. */ 2363 if (bp->cc_blob_address) { 2364 cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; 2365 goto found_cc_info; 2366 } 2367 2368 /* 2369 * If kernel was booted directly, without the use of the 2370 * boot/decompression kernel, the CC blob may have been passed via 2371 * setup_data instead. 2372 */ 2373 cc_info = find_cc_blob_setup_data(bp); 2374 if (!cc_info) 2375 return NULL; 2376 2377 found_cc_info: 2378 if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) 2379 snp_abort(); 2380 2381 return cc_info; 2382 } 2383 2384 static __head void svsm_setup(struct cc_blob_sev_info *cc_info) 2385 { 2386 struct svsm_call call = {}; 2387 int ret; 2388 u64 pa; 2389 2390 /* 2391 * Record the SVSM Calling Area address (CAA) if the guest is not 2392 * running at VMPL0. The CA will be used to communicate with the 2393 * SVSM to perform the SVSM services. 2394 */ 2395 if (!svsm_setup_ca(cc_info)) 2396 return; 2397 2398 /* 2399 * It is very early in the boot and the kernel is running identity 2400 * mapped but without having adjusted the pagetables to where the 2401 * kernel was loaded (physbase), so the get the CA address using 2402 * RIP-relative addressing. 2403 */ 2404 pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); 2405 2406 /* 2407 * Switch over to the boot SVSM CA while the current CA is still 2408 * addressable. There is no GHCB at this point so use the MSR protocol. 2409 * 2410 * SVSM_CORE_REMAP_CA call: 2411 * RAX = 0 (Protocol=0, CallID=0) 2412 * RCX = New CA GPA 2413 */ 2414 call.caa = svsm_get_caa(); 2415 call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); 2416 call.rcx = pa; 2417 ret = svsm_perform_call_protocol(&call); 2418 if (ret) 2419 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); 2420 2421 RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; 2422 RIP_REL_REF(boot_svsm_caa_pa) = pa; 2423 } 2424 2425 bool __head snp_init(struct boot_params *bp) 2426 { 2427 struct cc_blob_sev_info *cc_info; 2428 2429 if (!bp) 2430 return false; 2431 2432 cc_info = find_cc_blob(bp); 2433 if (!cc_info) 2434 return false; 2435 2436 if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) 2437 secrets_pa = cc_info->secrets_phys; 2438 else 2439 return false; 2440 2441 setup_cpuid_table(cc_info); 2442 2443 svsm_setup(cc_info); 2444 2445 /* 2446 * The CC blob will be used later to access the secrets page. Cache 2447 * it here like the boot kernel does. 2448 */ 2449 bp->cc_blob_address = (u32)(unsigned long)cc_info; 2450 2451 return true; 2452 } 2453 2454 void __head __noreturn snp_abort(void) 2455 { 2456 sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); 2457 } 2458 2459 /* 2460 * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are 2461 * enabled, as the alternative (fallback) logic for DMI probing in the legacy 2462 * ROM region can cause a crash since this region is not pre-validated. 2463 */ 2464 void __init snp_dmi_setup(void) 2465 { 2466 if (efi_enabled(EFI_CONFIG_TABLES)) 2467 dmi_setup(); 2468 } 2469 2470 static void dump_cpuid_table(void) 2471 { 2472 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2473 int i = 0; 2474 2475 pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", 2476 cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); 2477 2478 for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { 2479 const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; 2480 2481 pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", 2482 i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, 2483 fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); 2484 } 2485 } 2486 2487 /* 2488 * It is useful from an auditing/testing perspective to provide an easy way 2489 * for the guest owner to know that the CPUID table has been initialized as 2490 * expected, but that initialization happens too early in boot to print any 2491 * sort of indicator, and there's not really any other good place to do it, 2492 * so do it here. 2493 * 2494 * If running as an SNP guest, report the current VM privilege level (VMPL). 2495 */ 2496 static int __init report_snp_info(void) 2497 { 2498 const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); 2499 2500 if (cpuid_table->count) { 2501 pr_info("Using SNP CPUID table, %d entries present.\n", 2502 cpuid_table->count); 2503 2504 if (sev_cfg.debug) 2505 dump_cpuid_table(); 2506 } 2507 2508 if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2509 pr_info("SNP running at VMPL%u.\n", snp_vmpl); 2510 2511 return 0; 2512 } 2513 arch_initcall(report_snp_info); 2514 2515 static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) 2516 { 2517 /* If (new) lengths have been returned, propagate them up */ 2518 if (call->rcx_out != call->rcx) 2519 input->manifest_buf.len = call->rcx_out; 2520 2521 if (call->rdx_out != call->rdx) 2522 input->certificates_buf.len = call->rdx_out; 2523 2524 if (call->r8_out != call->r8) 2525 input->report_buf.len = call->r8_out; 2526 } 2527 2528 int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, 2529 struct svsm_attest_call *input) 2530 { 2531 struct svsm_attest_call *ac; 2532 unsigned long flags; 2533 u64 attest_call_pa; 2534 int ret; 2535 2536 if (!snp_vmpl) 2537 return -EINVAL; 2538 2539 local_irq_save(flags); 2540 2541 call->caa = svsm_get_caa(); 2542 2543 ac = (struct svsm_attest_call *)call->caa->svsm_buffer; 2544 attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); 2545 2546 *ac = *input; 2547 2548 /* 2549 * Set input registers for the request and set RDX and R8 to known 2550 * values in order to detect length values being returned in them. 2551 */ 2552 call->rax = call_id; 2553 call->rcx = attest_call_pa; 2554 call->rdx = -1; 2555 call->r8 = -1; 2556 ret = svsm_perform_call_protocol(call); 2557 update_attest_input(call, input); 2558 2559 local_irq_restore(flags); 2560 2561 return ret; 2562 } 2563 EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); 2564 2565 static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, 2566 struct snp_guest_request_ioctl *rio) 2567 { 2568 struct ghcb_state state; 2569 struct es_em_ctxt ctxt; 2570 unsigned long flags; 2571 struct ghcb *ghcb; 2572 int ret; 2573 2574 rio->exitinfo2 = SEV_RET_NO_FW_CALL; 2575 2576 /* 2577 * __sev_get_ghcb() needs to run with IRQs disabled because it is using 2578 * a per-CPU GHCB. 2579 */ 2580 local_irq_save(flags); 2581 2582 ghcb = __sev_get_ghcb(&state); 2583 if (!ghcb) { 2584 ret = -EIO; 2585 goto e_restore_irq; 2586 } 2587 2588 vc_ghcb_invalidate(ghcb); 2589 2590 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2591 ghcb_set_rax(ghcb, input->data_gpa); 2592 ghcb_set_rbx(ghcb, input->data_npages); 2593 } 2594 2595 ret = sev_es_ghcb_hv_call(ghcb, &ctxt, req->exit_code, input->req_gpa, input->resp_gpa); 2596 if (ret) 2597 goto e_put; 2598 2599 rio->exitinfo2 = ghcb->save.sw_exit_info_2; 2600 switch (rio->exitinfo2) { 2601 case 0: 2602 break; 2603 2604 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): 2605 ret = -EAGAIN; 2606 break; 2607 2608 case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): 2609 /* Number of expected pages are returned in RBX */ 2610 if (req->exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { 2611 input->data_npages = ghcb_get_rbx(ghcb); 2612 ret = -ENOSPC; 2613 break; 2614 } 2615 fallthrough; 2616 default: 2617 ret = -EIO; 2618 break; 2619 } 2620 2621 e_put: 2622 __sev_put_ghcb(&state); 2623 e_restore_irq: 2624 local_irq_restore(flags); 2625 2626 return ret; 2627 } 2628 2629 static struct platform_device sev_guest_device = { 2630 .name = "sev-guest", 2631 .id = -1, 2632 }; 2633 2634 static int __init snp_init_platform_device(void) 2635 { 2636 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2637 return -ENODEV; 2638 2639 if (platform_device_register(&sev_guest_device)) 2640 return -ENODEV; 2641 2642 pr_info("SNP guest platform device initialized.\n"); 2643 return 0; 2644 } 2645 device_initcall(snp_init_platform_device); 2646 2647 void sev_show_status(void) 2648 { 2649 int i; 2650 2651 pr_info("Status: "); 2652 for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { 2653 if (sev_status & BIT_ULL(i)) { 2654 if (!sev_status_feat_names[i]) 2655 continue; 2656 2657 pr_cont("%s ", sev_status_feat_names[i]); 2658 } 2659 } 2660 pr_cont("\n"); 2661 } 2662 2663 void __init snp_update_svsm_ca(void) 2664 { 2665 if (!snp_vmpl) 2666 return; 2667 2668 /* Update the CAA to a proper kernel address */ 2669 boot_svsm_caa = &boot_svsm_ca_page; 2670 } 2671 2672 #ifdef CONFIG_SYSFS 2673 static ssize_t vmpl_show(struct kobject *kobj, 2674 struct kobj_attribute *attr, char *buf) 2675 { 2676 return sysfs_emit(buf, "%d\n", snp_vmpl); 2677 } 2678 2679 static struct kobj_attribute vmpl_attr = __ATTR_RO(vmpl); 2680 2681 static struct attribute *vmpl_attrs[] = { 2682 &vmpl_attr.attr, 2683 NULL 2684 }; 2685 2686 static struct attribute_group sev_attr_group = { 2687 .attrs = vmpl_attrs, 2688 }; 2689 2690 static int __init sev_sysfs_init(void) 2691 { 2692 struct kobject *sev_kobj; 2693 struct device *dev_root; 2694 int ret; 2695 2696 if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) 2697 return -ENODEV; 2698 2699 dev_root = bus_get_dev_root(&cpu_subsys); 2700 if (!dev_root) 2701 return -ENODEV; 2702 2703 sev_kobj = kobject_create_and_add("sev", &dev_root->kobj); 2704 put_device(dev_root); 2705 2706 if (!sev_kobj) 2707 return -ENOMEM; 2708 2709 ret = sysfs_create_group(sev_kobj, &sev_attr_group); 2710 if (ret) 2711 kobject_put(sev_kobj); 2712 2713 return ret; 2714 } 2715 arch_initcall(sev_sysfs_init); 2716 #endif // CONFIG_SYSFS 2717 2718 static void free_shared_pages(void *buf, size_t sz) 2719 { 2720 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2721 int ret; 2722 2723 if (!buf) 2724 return; 2725 2726 ret = set_memory_encrypted((unsigned long)buf, npages); 2727 if (ret) { 2728 WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); 2729 return; 2730 } 2731 2732 __free_pages(virt_to_page(buf), get_order(sz)); 2733 } 2734 2735 static void *alloc_shared_pages(size_t sz) 2736 { 2737 unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; 2738 struct page *page; 2739 int ret; 2740 2741 page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); 2742 if (!page) 2743 return NULL; 2744 2745 ret = set_memory_decrypted((unsigned long)page_address(page), npages); 2746 if (ret) { 2747 pr_err("failed to mark page shared, ret=%d\n", ret); 2748 __free_pages(page, get_order(sz)); 2749 return NULL; 2750 } 2751 2752 return page_address(page); 2753 } 2754 2755 static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) 2756 { 2757 u8 *key = NULL; 2758 2759 switch (id) { 2760 case 0: 2761 *seqno = &secrets->os_area.msg_seqno_0; 2762 key = secrets->vmpck0; 2763 break; 2764 case 1: 2765 *seqno = &secrets->os_area.msg_seqno_1; 2766 key = secrets->vmpck1; 2767 break; 2768 case 2: 2769 *seqno = &secrets->os_area.msg_seqno_2; 2770 key = secrets->vmpck2; 2771 break; 2772 case 3: 2773 *seqno = &secrets->os_area.msg_seqno_3; 2774 key = secrets->vmpck3; 2775 break; 2776 default: 2777 break; 2778 } 2779 2780 return key; 2781 } 2782 2783 static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) 2784 { 2785 struct aesgcm_ctx *ctx; 2786 2787 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 2788 if (!ctx) 2789 return NULL; 2790 2791 if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { 2792 pr_err("Crypto context initialization failed\n"); 2793 kfree(ctx); 2794 return NULL; 2795 } 2796 2797 return ctx; 2798 } 2799 2800 int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) 2801 { 2802 /* Adjust the default VMPCK key based on the executing VMPL level */ 2803 if (vmpck_id == -1) 2804 vmpck_id = snp_vmpl; 2805 2806 mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); 2807 if (!mdesc->vmpck) { 2808 pr_err("Invalid VMPCK%d communication key\n", vmpck_id); 2809 return -EINVAL; 2810 } 2811 2812 /* Verify that VMPCK is not zero. */ 2813 if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 2814 pr_err("Empty VMPCK%d communication key\n", vmpck_id); 2815 return -EINVAL; 2816 } 2817 2818 mdesc->vmpck_id = vmpck_id; 2819 2820 mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); 2821 if (!mdesc->ctx) 2822 return -ENOMEM; 2823 2824 return 0; 2825 } 2826 EXPORT_SYMBOL_GPL(snp_msg_init); 2827 2828 struct snp_msg_desc *snp_msg_alloc(void) 2829 { 2830 struct snp_msg_desc *mdesc; 2831 void __iomem *mem; 2832 2833 BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); 2834 2835 mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); 2836 if (!mdesc) 2837 return ERR_PTR(-ENOMEM); 2838 2839 mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); 2840 if (!mem) 2841 goto e_free_mdesc; 2842 2843 mdesc->secrets = (__force struct snp_secrets_page *)mem; 2844 2845 /* Allocate the shared page used for the request and response message. */ 2846 mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2847 if (!mdesc->request) 2848 goto e_unmap; 2849 2850 mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); 2851 if (!mdesc->response) 2852 goto e_free_request; 2853 2854 return mdesc; 2855 2856 e_free_request: 2857 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2858 e_unmap: 2859 iounmap(mem); 2860 e_free_mdesc: 2861 kfree(mdesc); 2862 2863 return ERR_PTR(-ENOMEM); 2864 } 2865 EXPORT_SYMBOL_GPL(snp_msg_alloc); 2866 2867 void snp_msg_free(struct snp_msg_desc *mdesc) 2868 { 2869 if (!mdesc) 2870 return; 2871 2872 kfree(mdesc->ctx); 2873 free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); 2874 free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); 2875 iounmap((__force void __iomem *)mdesc->secrets); 2876 2877 memset(mdesc, 0, sizeof(*mdesc)); 2878 kfree(mdesc); 2879 } 2880 EXPORT_SYMBOL_GPL(snp_msg_free); 2881 2882 /* Mutex to serialize the shared buffer access and command handling. */ 2883 static DEFINE_MUTEX(snp_cmd_mutex); 2884 2885 /* 2886 * If an error is received from the host or AMD Secure Processor (ASP) there 2887 * are two options. Either retry the exact same encrypted request or discontinue 2888 * using the VMPCK. 2889 * 2890 * This is because in the current encryption scheme GHCB v2 uses AES-GCM to 2891 * encrypt the requests. The IV for this scheme is the sequence number. GCM 2892 * cannot tolerate IV reuse. 2893 * 2894 * The ASP FW v1.51 only increments the sequence numbers on a successful 2895 * guest<->ASP back and forth and only accepts messages at its exact sequence 2896 * number. 2897 * 2898 * So if the sequence number were to be reused the encryption scheme is 2899 * vulnerable. If the sequence number were incremented for a fresh IV the ASP 2900 * will reject the request. 2901 */ 2902 static void snp_disable_vmpck(struct snp_msg_desc *mdesc) 2903 { 2904 pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", 2905 mdesc->vmpck_id); 2906 memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); 2907 mdesc->vmpck = NULL; 2908 } 2909 2910 static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2911 { 2912 u64 count; 2913 2914 lockdep_assert_held(&snp_cmd_mutex); 2915 2916 /* Read the current message sequence counter from secrets pages */ 2917 count = *mdesc->os_area_msg_seqno; 2918 2919 return count + 1; 2920 } 2921 2922 /* Return a non-zero on success */ 2923 static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) 2924 { 2925 u64 count = __snp_get_msg_seqno(mdesc); 2926 2927 /* 2928 * The message sequence counter for the SNP guest request is a 64-bit 2929 * value but the version 2 of GHCB specification defines a 32-bit storage 2930 * for it. If the counter exceeds the 32-bit value then return zero. 2931 * The caller should check the return value, but if the caller happens to 2932 * not check the value and use it, then the firmware treats zero as an 2933 * invalid number and will fail the message request. 2934 */ 2935 if (count >= UINT_MAX) { 2936 pr_err("request message sequence counter overflow\n"); 2937 return 0; 2938 } 2939 2940 return count; 2941 } 2942 2943 static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) 2944 { 2945 /* 2946 * The counter is also incremented by the PSP, so increment it by 2 2947 * and save in secrets page. 2948 */ 2949 *mdesc->os_area_msg_seqno += 2; 2950 } 2951 2952 static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) 2953 { 2954 struct snp_guest_msg *resp_msg = &mdesc->secret_response; 2955 struct snp_guest_msg *req_msg = &mdesc->secret_request; 2956 struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; 2957 struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; 2958 struct aesgcm_ctx *ctx = mdesc->ctx; 2959 u8 iv[GCM_AES_IV_SIZE] = {}; 2960 2961 pr_debug("response [seqno %lld type %d version %d sz %d]\n", 2962 resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, 2963 resp_msg_hdr->msg_sz); 2964 2965 /* Copy response from shared memory to encrypted memory. */ 2966 memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); 2967 2968 /* Verify that the sequence counter is incremented by 1 */ 2969 if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) 2970 return -EBADMSG; 2971 2972 /* Verify response message type and version number. */ 2973 if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || 2974 resp_msg_hdr->msg_version != req_msg_hdr->msg_version) 2975 return -EBADMSG; 2976 2977 /* 2978 * If the message size is greater than our buffer length then return 2979 * an error. 2980 */ 2981 if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) 2982 return -EBADMSG; 2983 2984 /* Decrypt the payload */ 2985 memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); 2986 if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, 2987 &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) 2988 return -EBADMSG; 2989 2990 return 0; 2991 } 2992 2993 static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) 2994 { 2995 struct snp_guest_msg *msg = &mdesc->secret_request; 2996 struct snp_guest_msg_hdr *hdr = &msg->hdr; 2997 struct aesgcm_ctx *ctx = mdesc->ctx; 2998 u8 iv[GCM_AES_IV_SIZE] = {}; 2999 3000 memset(msg, 0, sizeof(*msg)); 3001 3002 hdr->algo = SNP_AEAD_AES_256_GCM; 3003 hdr->hdr_version = MSG_HDR_VER; 3004 hdr->hdr_sz = sizeof(*hdr); 3005 hdr->msg_type = req->msg_type; 3006 hdr->msg_version = req->msg_version; 3007 hdr->msg_seqno = seqno; 3008 hdr->msg_vmpck = req->vmpck_id; 3009 hdr->msg_sz = req->req_sz; 3010 3011 /* Verify the sequence number is non-zero */ 3012 if (!hdr->msg_seqno) 3013 return -ENOSR; 3014 3015 pr_debug("request [seqno %lld type %d version %d sz %d]\n", 3016 hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); 3017 3018 if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) 3019 return -EBADMSG; 3020 3021 memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); 3022 aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, 3023 AAD_LEN, iv, hdr->authtag); 3024 3025 return 0; 3026 } 3027 3028 static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3029 struct snp_guest_request_ioctl *rio) 3030 { 3031 unsigned long req_start = jiffies; 3032 unsigned int override_npages = 0; 3033 u64 override_err = 0; 3034 int rc; 3035 3036 retry_request: 3037 /* 3038 * Call firmware to process the request. In this function the encrypted 3039 * message enters shared memory with the host. So after this call the 3040 * sequence number must be incremented or the VMPCK must be deleted to 3041 * prevent reuse of the IV. 3042 */ 3043 rc = snp_issue_guest_request(req, &req->input, rio); 3044 switch (rc) { 3045 case -ENOSPC: 3046 /* 3047 * If the extended guest request fails due to having too 3048 * small of a certificate data buffer, retry the same 3049 * guest request without the extended data request in 3050 * order to increment the sequence number and thus avoid 3051 * IV reuse. 3052 */ 3053 override_npages = req->input.data_npages; 3054 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3055 3056 /* 3057 * Override the error to inform callers the given extended 3058 * request buffer size was too small and give the caller the 3059 * required buffer size. 3060 */ 3061 override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); 3062 3063 /* 3064 * If this call to the firmware succeeds, the sequence number can 3065 * be incremented allowing for continued use of the VMPCK. If 3066 * there is an error reflected in the return value, this value 3067 * is checked further down and the result will be the deletion 3068 * of the VMPCK and the error code being propagated back to the 3069 * user as an ioctl() return code. 3070 */ 3071 goto retry_request; 3072 3073 /* 3074 * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been 3075 * throttled. Retry in the driver to avoid returning and reusing the 3076 * message sequence number on a different message. 3077 */ 3078 case -EAGAIN: 3079 if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { 3080 rc = -ETIMEDOUT; 3081 break; 3082 } 3083 schedule_timeout_killable(SNP_REQ_RETRY_DELAY); 3084 goto retry_request; 3085 } 3086 3087 /* 3088 * Increment the message sequence number. There is no harm in doing 3089 * this now because decryption uses the value stored in the response 3090 * structure and any failure will wipe the VMPCK, preventing further 3091 * use anyway. 3092 */ 3093 snp_inc_msg_seqno(mdesc); 3094 3095 if (override_err) { 3096 rio->exitinfo2 = override_err; 3097 3098 /* 3099 * If an extended guest request was issued and the supplied certificate 3100 * buffer was not large enough, a standard guest request was issued to 3101 * prevent IV reuse. If the standard request was successful, return -EIO 3102 * back to the caller as would have originally been returned. 3103 */ 3104 if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3105 rc = -EIO; 3106 } 3107 3108 if (override_npages) 3109 req->input.data_npages = override_npages; 3110 3111 return rc; 3112 } 3113 3114 int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, 3115 struct snp_guest_request_ioctl *rio) 3116 { 3117 u64 seqno; 3118 int rc; 3119 3120 guard(mutex)(&snp_cmd_mutex); 3121 3122 /* Check if the VMPCK is not empty */ 3123 if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { 3124 pr_err_ratelimited("VMPCK is disabled\n"); 3125 return -ENOTTY; 3126 } 3127 3128 /* Get message sequence and verify that its a non-zero */ 3129 seqno = snp_get_msg_seqno(mdesc); 3130 if (!seqno) 3131 return -EIO; 3132 3133 /* Clear shared memory's response for the host to populate. */ 3134 memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); 3135 3136 /* Encrypt the userspace provided payload in mdesc->secret_request. */ 3137 rc = enc_payload(mdesc, seqno, req); 3138 if (rc) 3139 return rc; 3140 3141 /* 3142 * Write the fully encrypted request to the shared unencrypted 3143 * request page. 3144 */ 3145 memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); 3146 3147 /* Initialize the input address for guest request */ 3148 req->input.req_gpa = __pa(mdesc->request); 3149 req->input.resp_gpa = __pa(mdesc->response); 3150 req->input.data_gpa = req->certs_data ? __pa(req->certs_data) : 0; 3151 3152 rc = __handle_guest_request(mdesc, req, rio); 3153 if (rc) { 3154 if (rc == -EIO && 3155 rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) 3156 return rc; 3157 3158 pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", 3159 rc, rio->exitinfo2); 3160 3161 snp_disable_vmpck(mdesc); 3162 return rc; 3163 } 3164 3165 rc = verify_and_dec_payload(mdesc, req); 3166 if (rc) { 3167 pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); 3168 snp_disable_vmpck(mdesc); 3169 return rc; 3170 } 3171 3172 return 0; 3173 } 3174 EXPORT_SYMBOL_GPL(snp_send_guest_request); 3175 3176 static int __init snp_get_tsc_info(void) 3177 { 3178 struct snp_guest_request_ioctl *rio; 3179 struct snp_tsc_info_resp *tsc_resp; 3180 struct snp_tsc_info_req *tsc_req; 3181 struct snp_msg_desc *mdesc; 3182 struct snp_guest_req *req; 3183 int rc = -ENOMEM; 3184 3185 tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); 3186 if (!tsc_req) 3187 return rc; 3188 3189 /* 3190 * The intermediate response buffer is used while decrypting the 3191 * response payload. Make sure that it has enough space to cover 3192 * the authtag. 3193 */ 3194 tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); 3195 if (!tsc_resp) 3196 goto e_free_tsc_req; 3197 3198 req = kzalloc(sizeof(*req), GFP_KERNEL); 3199 if (!req) 3200 goto e_free_tsc_resp; 3201 3202 rio = kzalloc(sizeof(*rio), GFP_KERNEL); 3203 if (!rio) 3204 goto e_free_req; 3205 3206 mdesc = snp_msg_alloc(); 3207 if (IS_ERR_OR_NULL(mdesc)) 3208 goto e_free_rio; 3209 3210 rc = snp_msg_init(mdesc, snp_vmpl); 3211 if (rc) 3212 goto e_free_mdesc; 3213 3214 req->msg_version = MSG_HDR_VER; 3215 req->msg_type = SNP_MSG_TSC_INFO_REQ; 3216 req->vmpck_id = snp_vmpl; 3217 req->req_buf = tsc_req; 3218 req->req_sz = sizeof(*tsc_req); 3219 req->resp_buf = (void *)tsc_resp; 3220 req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; 3221 req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; 3222 3223 rc = snp_send_guest_request(mdesc, req, rio); 3224 if (rc) 3225 goto e_request; 3226 3227 pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", 3228 __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, 3229 tsc_resp->tsc_factor); 3230 3231 if (!tsc_resp->status) { 3232 snp_tsc_scale = tsc_resp->tsc_scale; 3233 snp_tsc_offset = tsc_resp->tsc_offset; 3234 } else { 3235 pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); 3236 rc = -EIO; 3237 } 3238 3239 e_request: 3240 /* The response buffer contains sensitive data, explicitly clear it. */ 3241 memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); 3242 e_free_mdesc: 3243 snp_msg_free(mdesc); 3244 e_free_rio: 3245 kfree(rio); 3246 e_free_req: 3247 kfree(req); 3248 e_free_tsc_resp: 3249 kfree(tsc_resp); 3250 e_free_tsc_req: 3251 kfree(tsc_req); 3252 3253 return rc; 3254 } 3255 3256 void __init snp_secure_tsc_prepare(void) 3257 { 3258 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3259 return; 3260 3261 if (snp_get_tsc_info()) { 3262 pr_alert("Unable to retrieve Secure TSC info from ASP\n"); 3263 sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); 3264 } 3265 3266 pr_debug("SecureTSC enabled"); 3267 } 3268 3269 static unsigned long securetsc_get_tsc_khz(void) 3270 { 3271 return snp_tsc_freq_khz; 3272 } 3273 3274 void __init snp_secure_tsc_init(void) 3275 { 3276 unsigned long long tsc_freq_mhz; 3277 3278 if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) 3279 return; 3280 3281 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); 3282 rdmsrq(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); 3283 snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); 3284 3285 x86_platform.calibrate_cpu = securetsc_get_tsc_khz; 3286 x86_platform.calibrate_tsc = securetsc_get_tsc_khz; 3287 } 3288