1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hyper-V Isolation VM interface with paravisor and hypervisor 4 * 5 * Author: 6 * Tianyu Lan <Tianyu.Lan@microsoft.com> 7 */ 8 9 #include <linux/bitfield.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/cpu.h> 13 #include <linux/export.h> 14 #include <asm/svm.h> 15 #include <asm/sev.h> 16 #include <asm/io.h> 17 #include <asm/coco.h> 18 #include <asm/mem_encrypt.h> 19 #include <asm/set_memory.h> 20 #include <asm/mshyperv.h> 21 #include <asm/hypervisor.h> 22 #include <asm/mtrr.h> 23 #include <asm/io_apic.h> 24 #include <asm/realmode.h> 25 #include <asm/e820/api.h> 26 #include <asm/desc.h> 27 #include <asm/msr.h> 28 #include <uapi/asm/vmx.h> 29 30 #ifdef CONFIG_AMD_MEM_ENCRYPT 31 32 #define GHCB_USAGE_HYPERV_CALL 1 33 34 union hv_ghcb { 35 struct ghcb ghcb; 36 struct { 37 u64 hypercalldata[509]; 38 u64 outputgpa; 39 union { 40 union { 41 struct { 42 u32 callcode : 16; 43 u32 isfast : 1; 44 u32 reserved1 : 14; 45 u32 isnested : 1; 46 u32 countofelements : 12; 47 u32 reserved2 : 4; 48 u32 repstartindex : 12; 49 u32 reserved3 : 4; 50 }; 51 u64 asuint64; 52 } hypercallinput; 53 union { 54 struct { 55 u16 callstatus; 56 u16 reserved1; 57 u32 elementsprocessed : 12; 58 u32 reserved2 : 20; 59 }; 60 u64 asunit64; 61 } hypercalloutput; 62 }; 63 u64 reserved2; 64 } hypercall; 65 } __packed __aligned(HV_HYP_PAGE_SIZE); 66 67 /* Only used in an SNP VM with the paravisor */ 68 static u16 hv_ghcb_version __ro_after_init; 69 70 /* Functions only used in an SNP VM with the paravisor go here. */ 71 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) 72 { 73 union hv_ghcb *hv_ghcb; 74 void **ghcb_base; 75 unsigned long flags; 76 u64 status; 77 78 if (!hv_ghcb_pg) 79 return -EFAULT; 80 81 WARN_ON(in_nmi()); 82 83 local_irq_save(flags); 84 ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); 85 hv_ghcb = (union hv_ghcb *)*ghcb_base; 86 if (!hv_ghcb) { 87 local_irq_restore(flags); 88 return -EFAULT; 89 } 90 91 hv_ghcb->ghcb.protocol_version = GHCB_PROTOCOL_MAX; 92 hv_ghcb->ghcb.ghcb_usage = GHCB_USAGE_HYPERV_CALL; 93 94 hv_ghcb->hypercall.outputgpa = (u64)output; 95 hv_ghcb->hypercall.hypercallinput.asuint64 = 0; 96 hv_ghcb->hypercall.hypercallinput.callcode = control; 97 98 if (input_size) 99 memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size); 100 101 VMGEXIT(); 102 103 hv_ghcb->ghcb.ghcb_usage = 0xffffffff; 104 memset(hv_ghcb->ghcb.save.valid_bitmap, 0, 105 sizeof(hv_ghcb->ghcb.save.valid_bitmap)); 106 107 status = hv_ghcb->hypercall.hypercalloutput.callstatus; 108 109 local_irq_restore(flags); 110 111 return status; 112 } 113 114 static inline u64 rd_ghcb_msr(void) 115 { 116 return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB); 117 } 118 119 static inline void wr_ghcb_msr(u64 val) 120 { 121 native_wrmsrq(MSR_AMD64_SEV_ES_GHCB, val); 122 } 123 124 static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, 125 u64 exit_info_1, u64 exit_info_2) 126 { 127 /* Fill in protocol and format specifiers */ 128 ghcb->protocol_version = hv_ghcb_version; 129 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; 130 131 ghcb_set_sw_exit_code(ghcb, exit_code); 132 ghcb_set_sw_exit_info_1(ghcb, exit_info_1); 133 ghcb_set_sw_exit_info_2(ghcb, exit_info_2); 134 135 VMGEXIT(); 136 137 if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0)) 138 return ES_VMM_ERROR; 139 else 140 return ES_OK; 141 } 142 143 void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason) 144 { 145 u64 val = GHCB_MSR_TERM_REQ; 146 147 /* Tell the hypervisor what went wrong. */ 148 val |= GHCB_SEV_TERM_REASON(set, reason); 149 150 /* Request Guest Termination from Hypervisor */ 151 wr_ghcb_msr(val); 152 VMGEXIT(); 153 154 while (true) 155 asm volatile("hlt\n" : : : "memory"); 156 } 157 158 bool hv_ghcb_negotiate_protocol(void) 159 { 160 u64 ghcb_gpa; 161 u64 val; 162 163 /* Save ghcb page gpa. */ 164 ghcb_gpa = rd_ghcb_msr(); 165 166 /* Do the GHCB protocol version negotiation */ 167 wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); 168 VMGEXIT(); 169 val = rd_ghcb_msr(); 170 171 if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) 172 return false; 173 174 if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || 175 GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) 176 return false; 177 178 hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), 179 GHCB_PROTOCOL_MAX); 180 181 /* Write ghcb page back after negotiating protocol. */ 182 wr_ghcb_msr(ghcb_gpa); 183 VMGEXIT(); 184 185 return true; 186 } 187 188 static void hv_ghcb_msr_write(u64 msr, u64 value) 189 { 190 union hv_ghcb *hv_ghcb; 191 void **ghcb_base; 192 unsigned long flags; 193 194 if (!hv_ghcb_pg) 195 return; 196 197 WARN_ON(in_nmi()); 198 199 local_irq_save(flags); 200 ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); 201 hv_ghcb = (union hv_ghcb *)*ghcb_base; 202 if (!hv_ghcb) { 203 local_irq_restore(flags); 204 return; 205 } 206 207 ghcb_set_rcx(&hv_ghcb->ghcb, msr); 208 ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value)); 209 ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value)); 210 211 if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0)) 212 pr_warn("Fail to write msr via ghcb %llx.\n", msr); 213 214 local_irq_restore(flags); 215 } 216 217 static void hv_ghcb_msr_read(u64 msr, u64 *value) 218 { 219 union hv_ghcb *hv_ghcb; 220 void **ghcb_base; 221 unsigned long flags; 222 223 /* Check size of union hv_ghcb here. */ 224 BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE); 225 226 if (!hv_ghcb_pg) 227 return; 228 229 WARN_ON(in_nmi()); 230 231 local_irq_save(flags); 232 ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); 233 hv_ghcb = (union hv_ghcb *)*ghcb_base; 234 if (!hv_ghcb) { 235 local_irq_restore(flags); 236 return; 237 } 238 239 ghcb_set_rcx(&hv_ghcb->ghcb, msr); 240 if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0)) 241 pr_warn("Fail to read msr via ghcb %llx.\n", msr); 242 else 243 *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) 244 | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); 245 local_irq_restore(flags); 246 } 247 248 /* Only used in a fully enlightened SNP VM, i.e. without the paravisor */ 249 static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); 250 static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE); 251 static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa); 252 253 /* Functions only used in an SNP VM without the paravisor go here. */ 254 255 #define hv_populate_vmcb_seg(seg, gdtr_base) \ 256 do { \ 257 if (seg.selector) { \ 258 seg.base = 0; \ 259 seg.limit = HV_AP_SEGMENT_LIMIT; \ 260 seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \ 261 seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \ 262 } \ 263 } while (0) \ 264 265 static int snp_set_vmsa(void *va, bool vmsa) 266 { 267 u64 attrs; 268 269 /* 270 * Running at VMPL0 allows the kernel to change the VMSA bit for a page 271 * using the RMPADJUST instruction. However, for the instruction to 272 * succeed it must target the permissions of a lesser privileged 273 * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST 274 * instruction in the AMD64 APM Volume 3). 275 */ 276 attrs = 1; 277 if (vmsa) 278 attrs |= RMPADJUST_VMSA_PAGE_BIT; 279 280 return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 281 } 282 283 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) 284 { 285 int err; 286 287 err = snp_set_vmsa(vmsa, false); 288 if (err) 289 pr_err("clear VMSA page failed (%u), leaking page\n", err); 290 else 291 free_page((unsigned long)vmsa); 292 } 293 294 int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) 295 { 296 struct sev_es_save_area *vmsa = (struct sev_es_save_area *) 297 __get_free_page(GFP_KERNEL | __GFP_ZERO); 298 struct sev_es_save_area *cur_vmsa; 299 struct desc_ptr gdtr; 300 u64 ret, retry = 5; 301 struct hv_enable_vp_vtl *start_vp_input; 302 unsigned long flags; 303 int vp_index; 304 305 if (!vmsa) 306 return -ENOMEM; 307 308 /* Find the Hyper-V VP index which might be not the same as APIC ID */ 309 vp_index = hv_apicid_to_vp_index(apic_id); 310 if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) 311 return -EINVAL; 312 313 native_store_gdt(&gdtr); 314 315 vmsa->gdtr.base = gdtr.address; 316 vmsa->gdtr.limit = gdtr.size; 317 318 asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); 319 hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); 320 321 asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); 322 hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); 323 324 asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); 325 hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); 326 327 asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); 328 hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); 329 330 vmsa->efer = native_read_msr(MSR_EFER); 331 332 vmsa->cr4 = native_read_cr4(); 333 vmsa->cr3 = __native_read_cr3(); 334 vmsa->cr0 = native_read_cr0(); 335 336 vmsa->xcr0 = 1; 337 vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; 338 vmsa->rip = (u64)secondary_startup_64_no_verify; 339 vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE]; 340 341 /* 342 * Set the SNP-specific fields for this VMSA: 343 * VMPL level 344 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 345 */ 346 vmsa->vmpl = 0; 347 vmsa->sev_features = sev_status >> 2; 348 349 ret = snp_set_vmsa(vmsa, true); 350 if (ret) { 351 pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); 352 free_page((u64)vmsa); 353 return ret; 354 } 355 356 local_irq_save(flags); 357 start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; 358 memset(start_vp_input, 0, sizeof(*start_vp_input)); 359 start_vp_input->partition_id = -1; 360 start_vp_input->vp_index = vp_index; 361 start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; 362 *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; 363 364 do { 365 ret = hv_do_hypercall(HVCALL_START_VP, 366 start_vp_input, NULL); 367 } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--); 368 369 local_irq_restore(flags); 370 371 if (!hv_result_success(ret)) { 372 pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret); 373 snp_cleanup_vmsa(vmsa); 374 vmsa = NULL; 375 } 376 377 cur_vmsa = per_cpu(hv_sev_vmsa, cpu); 378 /* Free up any previous VMSA page */ 379 if (cur_vmsa) 380 snp_cleanup_vmsa(cur_vmsa); 381 382 /* Record the current VMSA page */ 383 per_cpu(hv_sev_vmsa, cpu) = vmsa; 384 385 return ret; 386 } 387 388 u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) 389 { 390 u64 hv_status; 391 392 register u64 __r8 asm("r8") = param2; 393 asm volatile("vmmcall" 394 : "=a" (hv_status), ASM_CALL_CONSTRAINT, 395 "+c" (control), "+d" (param1), "+r" (__r8) 396 : : "cc", "memory", "r9", "r10", "r11"); 397 398 return hv_status; 399 } 400 401 #else 402 static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} 403 static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} 404 u64 hv_snp_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } 405 #endif /* CONFIG_AMD_MEM_ENCRYPT */ 406 407 #ifdef CONFIG_INTEL_TDX_GUEST 408 static void hv_tdx_msr_write(u64 msr, u64 val) 409 { 410 struct tdx_module_args args = { 411 .r10 = TDX_HYPERCALL_STANDARD, 412 .r11 = EXIT_REASON_MSR_WRITE, 413 .r12 = msr, 414 .r13 = val, 415 }; 416 417 u64 ret = __tdx_hypercall(&args); 418 419 WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret); 420 } 421 422 static void hv_tdx_msr_read(u64 msr, u64 *val) 423 { 424 struct tdx_module_args args = { 425 .r10 = TDX_HYPERCALL_STANDARD, 426 .r11 = EXIT_REASON_MSR_READ, 427 .r12 = msr, 428 }; 429 430 u64 ret = __tdx_hypercall(&args); 431 432 if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret)) 433 *val = 0; 434 else 435 *val = args.r11; 436 } 437 438 u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) 439 { 440 struct tdx_module_args args = { }; 441 442 args.r10 = control; 443 args.rdx = param1; 444 args.r8 = param2; 445 446 (void)__tdx_hypercall(&args); 447 448 return args.r11; 449 } 450 451 #else 452 static inline void hv_tdx_msr_write(u64 msr, u64 value) {} 453 static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} 454 u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { return U64_MAX; } 455 #endif /* CONFIG_INTEL_TDX_GUEST */ 456 457 #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) 458 void hv_ivm_msr_write(u64 msr, u64 value) 459 { 460 if (!ms_hyperv.paravisor_present) 461 return; 462 463 if (hv_isolation_type_tdx()) 464 hv_tdx_msr_write(msr, value); 465 else if (hv_isolation_type_snp()) 466 hv_ghcb_msr_write(msr, value); 467 } 468 469 void hv_ivm_msr_read(u64 msr, u64 *value) 470 { 471 if (!ms_hyperv.paravisor_present) 472 return; 473 474 if (hv_isolation_type_tdx()) 475 hv_tdx_msr_read(msr, value); 476 else if (hv_isolation_type_snp()) 477 hv_ghcb_msr_read(msr, value); 478 } 479 480 /* 481 * Keep track of the PFN regions which were shared with the host. The access 482 * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()). 483 */ 484 struct hv_enc_pfn_region { 485 struct list_head list; 486 u64 pfn; 487 int count; 488 }; 489 490 static LIST_HEAD(hv_list_enc); 491 static DEFINE_RAW_SPINLOCK(hv_list_enc_lock); 492 493 static int hv_list_enc_add(const u64 *pfn_list, int count) 494 { 495 struct hv_enc_pfn_region *ent; 496 unsigned long flags; 497 u64 pfn; 498 int i; 499 500 for (i = 0; i < count; i++) { 501 pfn = pfn_list[i]; 502 503 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 504 /* Check if the PFN already exists in some region first */ 505 list_for_each_entry(ent, &hv_list_enc, list) { 506 if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn)) 507 /* Nothing to do - pfn is already in the list */ 508 goto unlock_done; 509 } 510 511 /* 512 * Check if the PFN is adjacent to an existing region. Growing 513 * a region can make it adjacent to another one but merging is 514 * not (yet) implemented for simplicity. A PFN cannot be added 515 * to two regions to keep the logic in hv_list_enc_remove() 516 * correct. 517 */ 518 list_for_each_entry(ent, &hv_list_enc, list) { 519 if (ent->pfn + ent->count == pfn) { 520 /* Grow existing region up */ 521 ent->count++; 522 goto unlock_done; 523 } else if (pfn + 1 == ent->pfn) { 524 /* Grow existing region down */ 525 ent->pfn--; 526 ent->count++; 527 goto unlock_done; 528 } 529 } 530 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 531 532 /* No adjacent region found -- create a new one */ 533 ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); 534 if (!ent) 535 return -ENOMEM; 536 537 ent->pfn = pfn; 538 ent->count = 1; 539 540 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 541 list_add(&ent->list, &hv_list_enc); 542 543 unlock_done: 544 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 545 } 546 547 return 0; 548 } 549 550 static int hv_list_enc_remove(const u64 *pfn_list, int count) 551 { 552 struct hv_enc_pfn_region *ent, *t; 553 struct hv_enc_pfn_region new_region; 554 unsigned long flags; 555 u64 pfn; 556 int i; 557 558 for (i = 0; i < count; i++) { 559 pfn = pfn_list[i]; 560 561 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 562 list_for_each_entry_safe(ent, t, &hv_list_enc, list) { 563 if (pfn == ent->pfn + ent->count - 1) { 564 /* Removing tail pfn */ 565 ent->count--; 566 if (!ent->count) { 567 list_del(&ent->list); 568 kfree(ent); 569 } 570 goto unlock_done; 571 } else if (pfn == ent->pfn) { 572 /* Removing head pfn */ 573 ent->count--; 574 ent->pfn++; 575 if (!ent->count) { 576 list_del(&ent->list); 577 kfree(ent); 578 } 579 goto unlock_done; 580 } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) { 581 /* 582 * Removing a pfn in the middle. Cut off the tail 583 * of the existing region and create a template for 584 * the new one. 585 */ 586 new_region.pfn = pfn + 1; 587 new_region.count = ent->count - (pfn - ent->pfn + 1); 588 ent->count = pfn - ent->pfn; 589 goto unlock_split; 590 } 591 592 } 593 unlock_done: 594 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 595 continue; 596 597 unlock_split: 598 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 599 600 ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); 601 if (!ent) 602 return -ENOMEM; 603 604 ent->pfn = new_region.pfn; 605 ent->count = new_region.count; 606 607 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 608 list_add(&ent->list, &hv_list_enc); 609 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 610 } 611 612 return 0; 613 } 614 615 /* Stop new private<->shared conversions */ 616 static void hv_vtom_kexec_begin(void) 617 { 618 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 619 return; 620 621 /* 622 * Crash kernel reaches here with interrupts disabled: can't wait for 623 * conversions to finish. 624 * 625 * If race happened, just report and proceed. 626 */ 627 if (!set_memory_enc_stop_conversion()) 628 pr_warn("Failed to stop shared<->private conversions\n"); 629 } 630 631 static void hv_vtom_kexec_finish(void) 632 { 633 struct hv_gpa_range_for_visibility *input; 634 struct hv_enc_pfn_region *ent; 635 unsigned long flags; 636 u64 hv_status; 637 int cur, i; 638 639 local_irq_save(flags); 640 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 641 642 if (unlikely(!input)) 643 goto out; 644 645 list_for_each_entry(ent, &hv_list_enc, list) { 646 for (i = 0, cur = 0; i < ent->count; i++) { 647 input->gpa_page_list[cur] = ent->pfn + i; 648 cur++; 649 650 if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) { 651 input->partition_id = HV_PARTITION_ID_SELF; 652 input->host_visibility = VMBUS_PAGE_NOT_VISIBLE; 653 input->reserved0 = 0; 654 input->reserved1 = 0; 655 hv_status = hv_do_rep_hypercall( 656 HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, 657 cur, 0, input, NULL); 658 WARN_ON_ONCE(!hv_result_success(hv_status)); 659 cur = 0; 660 } 661 } 662 663 } 664 665 out: 666 local_irq_restore(flags); 667 } 668 669 /* 670 * hv_mark_gpa_visibility - Set pages visible to host via hvcall. 671 * 672 * In Isolation VM, all guest memory is encrypted from host and guest 673 * needs to set memory visible to host via hvcall before sharing memory 674 * with host. 675 */ 676 static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], 677 enum hv_mem_host_visibility visibility) 678 { 679 struct hv_gpa_range_for_visibility *input; 680 u64 hv_status; 681 unsigned long flags; 682 int ret; 683 684 /* no-op if partition isolation is not enabled */ 685 if (!hv_is_isolation_supported()) 686 return 0; 687 688 if (count > HV_MAX_MODIFY_GPA_REP_COUNT) { 689 pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count, 690 HV_MAX_MODIFY_GPA_REP_COUNT); 691 return -EINVAL; 692 } 693 694 if (visibility == VMBUS_PAGE_NOT_VISIBLE) 695 ret = hv_list_enc_remove(pfn, count); 696 else 697 ret = hv_list_enc_add(pfn, count); 698 if (ret) 699 return ret; 700 701 local_irq_save(flags); 702 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 703 704 if (unlikely(!input)) { 705 local_irq_restore(flags); 706 return -EINVAL; 707 } 708 709 input->partition_id = HV_PARTITION_ID_SELF; 710 input->host_visibility = visibility; 711 input->reserved0 = 0; 712 input->reserved1 = 0; 713 memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); 714 hv_status = hv_do_rep_hypercall( 715 HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, 716 0, input, NULL); 717 local_irq_restore(flags); 718 719 if (hv_result_success(hv_status)) 720 return 0; 721 722 if (visibility == VMBUS_PAGE_NOT_VISIBLE) 723 ret = hv_list_enc_add(pfn, count); 724 else 725 ret = hv_list_enc_remove(pfn, count); 726 /* 727 * There's no good way to recover from -ENOMEM here, the accounting is 728 * wrong either way. 729 */ 730 WARN_ON_ONCE(ret); 731 732 return -EFAULT; 733 } 734 735 /* 736 * When transitioning memory between encrypted and decrypted, the caller 737 * of set_memory_encrypted() or set_memory_decrypted() is responsible for 738 * ensuring that the memory isn't in use and isn't referenced while the 739 * transition is in progress. The transition has multiple steps, and the 740 * memory is in an inconsistent state until all steps are complete. A 741 * reference while the state is inconsistent could result in an exception 742 * that can't be cleanly fixed up. 743 * 744 * But the Linux kernel load_unaligned_zeropad() mechanism could cause a 745 * stray reference that can't be prevented by the caller, so Linux has 746 * specific code to handle this case. But when the #VC and #VE exceptions 747 * routed to a paravisor, the specific code doesn't work. To avoid this 748 * problem, mark the pages as "not present" while the transition is in 749 * progress. If load_unaligned_zeropad() causes a stray reference, a normal 750 * page fault is generated instead of #VC or #VE, and the page-fault-based 751 * handlers for load_unaligned_zeropad() resolve the reference. When the 752 * transition is complete, hv_vtom_set_host_visibility() marks the pages 753 * as "present" again. 754 */ 755 static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) 756 { 757 return set_memory_np(kbuffer, pagecount); 758 } 759 760 /* 761 * hv_vtom_set_host_visibility - Set specified memory visible to host. 762 * 763 * In Isolation VM, all guest memory is encrypted from host and guest 764 * needs to set memory visible to host via hvcall before sharing memory 765 * with host. This function works as wrap of hv_mark_gpa_visibility() 766 * with memory base and size. 767 */ 768 static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) 769 { 770 enum hv_mem_host_visibility visibility = enc ? 771 VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; 772 u64 *pfn_array; 773 phys_addr_t paddr; 774 int i, pfn, err; 775 void *vaddr; 776 int ret = 0; 777 778 pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); 779 if (!pfn_array) { 780 ret = -ENOMEM; 781 goto err_set_memory_p; 782 } 783 784 for (i = 0, pfn = 0; i < pagecount; i++) { 785 /* 786 * Use slow_virt_to_phys() because the PRESENT bit has been 787 * temporarily cleared in the PTEs. slow_virt_to_phys() works 788 * without the PRESENT bit while virt_to_hvpfn() or similar 789 * does not. 790 */ 791 vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE); 792 paddr = slow_virt_to_phys(vaddr); 793 pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT; 794 pfn++; 795 796 if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { 797 ret = hv_mark_gpa_visibility(pfn, pfn_array, 798 visibility); 799 if (ret) 800 goto err_free_pfn_array; 801 pfn = 0; 802 } 803 } 804 805 err_free_pfn_array: 806 kfree(pfn_array); 807 808 err_set_memory_p: 809 /* 810 * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present() 811 * did. Do this even if there is an error earlier in this function in 812 * order to avoid leaving the memory range in a "broken" state. Setting 813 * the PRESENT bits shouldn't fail, but return an error if it does. 814 */ 815 err = set_memory_p(kbuffer, pagecount); 816 if (err && !ret) 817 ret = err; 818 819 return ret; 820 } 821 822 static bool hv_vtom_tlb_flush_required(bool private) 823 { 824 /* 825 * Since hv_vtom_clear_present() marks the PTEs as "not present" 826 * and flushes the TLB, they can't be in the TLB. That makes the 827 * flush controlled by this function redundant, so return "false". 828 */ 829 return false; 830 } 831 832 static bool hv_vtom_cache_flush_required(void) 833 { 834 return false; 835 } 836 837 static bool hv_is_private_mmio(u64 addr) 838 { 839 /* 840 * Hyper-V always provides a single IO-APIC in a guest VM. 841 * When a paravisor is used, it is emulated by the paravisor 842 * in the guest context and must be mapped private. 843 */ 844 if (addr >= HV_IOAPIC_BASE_ADDRESS && 845 addr < (HV_IOAPIC_BASE_ADDRESS + PAGE_SIZE)) 846 return true; 847 848 /* Same with a vTPM */ 849 if (addr >= VTPM_BASE_ADDRESS && 850 addr < (VTPM_BASE_ADDRESS + PAGE_SIZE)) 851 return true; 852 853 return false; 854 } 855 856 void __init hv_vtom_init(void) 857 { 858 enum hv_isolation_type type = hv_get_isolation_type(); 859 860 switch (type) { 861 case HV_ISOLATION_TYPE_VBS: 862 fallthrough; 863 /* 864 * By design, a VM using vTOM doesn't see the SEV setting, 865 * so SEV initialization is bypassed and sev_status isn't set. 866 * Set it here to indicate a vTOM VM. 867 * 868 * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is 869 * defined as 0ULL, to which we can't assigned a value. 870 */ 871 #ifdef CONFIG_AMD_MEM_ENCRYPT 872 case HV_ISOLATION_TYPE_SNP: 873 sev_status = MSR_AMD64_SNP_VTOM; 874 cc_vendor = CC_VENDOR_AMD; 875 break; 876 #endif 877 878 case HV_ISOLATION_TYPE_TDX: 879 cc_vendor = CC_VENDOR_INTEL; 880 break; 881 882 default: 883 panic("hv_vtom_init: unsupported isolation type %d\n", type); 884 } 885 886 cc_set_mask(ms_hyperv.shared_gpa_boundary); 887 physical_mask &= ms_hyperv.shared_gpa_boundary - 1; 888 889 x86_platform.hyper.is_private_mmio = hv_is_private_mmio; 890 x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; 891 x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; 892 x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; 893 x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; 894 x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin; 895 x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish; 896 897 /* Set WB as the default cache mode. */ 898 guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); 899 } 900 901 #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ 902 903 enum hv_isolation_type hv_get_isolation_type(void) 904 { 905 if (!(ms_hyperv.priv_high & HV_ISOLATION)) 906 return HV_ISOLATION_TYPE_NONE; 907 return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); 908 } 909 EXPORT_SYMBOL_GPL(hv_get_isolation_type); 910 911 /* 912 * hv_is_isolation_supported - Check system runs in the Hyper-V 913 * isolation VM. 914 */ 915 bool hv_is_isolation_supported(void) 916 { 917 if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) 918 return false; 919 920 if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) 921 return false; 922 923 return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; 924 } 925 926 DEFINE_STATIC_KEY_FALSE(isolation_type_snp); 927 928 /* 929 * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based 930 * isolation VM. 931 */ 932 bool hv_isolation_type_snp(void) 933 { 934 return static_branch_unlikely(&isolation_type_snp); 935 } 936 937 DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); 938 /* 939 * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based 940 * isolated VM. 941 */ 942 bool hv_isolation_type_tdx(void) 943 { 944 return static_branch_unlikely(&isolation_type_tdx); 945 } 946