1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hyper-V Isolation VM interface with paravisor and hypervisor 4 * 5 * Author: 6 * Tianyu Lan <Tianyu.Lan@microsoft.com> 7 */ 8 9 #include <linux/bitfield.h> 10 #include <linux/types.h> 11 #include <linux/slab.h> 12 #include <linux/cpu.h> 13 #include <linux/export.h> 14 #include <asm/svm.h> 15 #include <asm/sev.h> 16 #include <asm/io.h> 17 #include <asm/coco.h> 18 #include <asm/mem_encrypt.h> 19 #include <asm/set_memory.h> 20 #include <asm/mshyperv.h> 21 #include <asm/hypervisor.h> 22 #include <asm/mtrr.h> 23 #include <asm/io_apic.h> 24 #include <asm/realmode.h> 25 #include <asm/e820/api.h> 26 #include <asm/desc.h> 27 #include <asm/msr.h> 28 #include <uapi/asm/vmx.h> 29 30 #ifdef CONFIG_AMD_MEM_ENCRYPT 31 32 #define GHCB_USAGE_HYPERV_CALL 1 33 34 union hv_ghcb { 35 struct ghcb ghcb; 36 struct { 37 u64 hypercalldata[509]; 38 u64 outputgpa; 39 union { 40 union { 41 struct { 42 u32 callcode : 16; 43 u32 isfast : 1; 44 u32 reserved1 : 14; 45 u32 isnested : 1; 46 u32 countofelements : 12; 47 u32 reserved2 : 4; 48 u32 repstartindex : 12; 49 u32 reserved3 : 4; 50 }; 51 u64 asuint64; 52 } hypercallinput; 53 union { 54 struct { 55 u16 callstatus; 56 u16 reserved1; 57 u32 elementsprocessed : 12; 58 u32 reserved2 : 20; 59 }; 60 u64 asunit64; 61 } hypercalloutput; 62 }; 63 u64 reserved2; 64 } hypercall; 65 } __packed __aligned(HV_HYP_PAGE_SIZE); 66 67 /* Only used in an SNP VM with the paravisor */ 68 static u16 hv_ghcb_version __ro_after_init; 69 70 /* Functions only used in an SNP VM with the paravisor go here. */ 71 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size) 72 { 73 union hv_ghcb *hv_ghcb; 74 void **ghcb_base; 75 unsigned long flags; 76 u64 status; 77 78 if (!hv_ghcb_pg) 79 return -EFAULT; 80 81 WARN_ON(in_nmi()); 82 83 local_irq_save(flags); 84 ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); 85 hv_ghcb = (union hv_ghcb *)*ghcb_base; 86 if (!hv_ghcb) { 87 local_irq_restore(flags); 88 return -EFAULT; 89 } 90 91 hv_ghcb->ghcb.protocol_version = GHCB_PROTOCOL_MAX; 92 hv_ghcb->ghcb.ghcb_usage = GHCB_USAGE_HYPERV_CALL; 93 94 hv_ghcb->hypercall.outputgpa = (u64)output; 95 hv_ghcb->hypercall.hypercallinput.asuint64 = 0; 96 hv_ghcb->hypercall.hypercallinput.callcode = control; 97 98 if (input_size) 99 memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size); 100 101 VMGEXIT(); 102 103 hv_ghcb->ghcb.ghcb_usage = 0xffffffff; 104 memset(hv_ghcb->ghcb.save.valid_bitmap, 0, 105 sizeof(hv_ghcb->ghcb.save.valid_bitmap)); 106 107 status = hv_ghcb->hypercall.hypercalloutput.callstatus; 108 109 local_irq_restore(flags); 110 111 return status; 112 } 113 114 static inline u64 rd_ghcb_msr(void) 115 { 116 return native_rdmsrq(MSR_AMD64_SEV_ES_GHCB); 117 } 118 119 static inline void wr_ghcb_msr(u64 val) 120 { 121 native_wrmsrq(MSR_AMD64_SEV_ES_GHCB, val); 122 } 123 124 static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code, 125 u64 exit_info_1, u64 exit_info_2) 126 { 127 /* Fill in protocol and format specifiers */ 128 ghcb->protocol_version = hv_ghcb_version; 129 ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; 130 131 ghcb_set_sw_exit_code(ghcb, exit_code); 132 ghcb_set_sw_exit_info_1(ghcb, exit_info_1); 133 ghcb_set_sw_exit_info_2(ghcb, exit_info_2); 134 135 VMGEXIT(); 136 137 if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0)) 138 return ES_VMM_ERROR; 139 else 140 return ES_OK; 141 } 142 143 void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason) 144 { 145 u64 val = GHCB_MSR_TERM_REQ; 146 147 /* Tell the hypervisor what went wrong. */ 148 val |= GHCB_SEV_TERM_REASON(set, reason); 149 150 /* Request Guest Termination from Hypervisor */ 151 wr_ghcb_msr(val); 152 VMGEXIT(); 153 154 while (true) 155 asm volatile("hlt\n" : : : "memory"); 156 } 157 158 bool hv_ghcb_negotiate_protocol(void) 159 { 160 u64 ghcb_gpa; 161 u64 val; 162 163 /* Save ghcb page gpa. */ 164 ghcb_gpa = rd_ghcb_msr(); 165 166 /* Do the GHCB protocol version negotiation */ 167 wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); 168 VMGEXIT(); 169 val = rd_ghcb_msr(); 170 171 if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) 172 return false; 173 174 if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || 175 GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) 176 return false; 177 178 hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), 179 GHCB_PROTOCOL_MAX); 180 181 /* Write ghcb page back after negotiating protocol. */ 182 wr_ghcb_msr(ghcb_gpa); 183 VMGEXIT(); 184 185 return true; 186 } 187 188 static void hv_ghcb_msr_write(u64 msr, u64 value) 189 { 190 union hv_ghcb *hv_ghcb; 191 void **ghcb_base; 192 unsigned long flags; 193 194 if (!hv_ghcb_pg) 195 return; 196 197 WARN_ON(in_nmi()); 198 199 local_irq_save(flags); 200 ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); 201 hv_ghcb = (union hv_ghcb *)*ghcb_base; 202 if (!hv_ghcb) { 203 local_irq_restore(flags); 204 return; 205 } 206 207 ghcb_set_rcx(&hv_ghcb->ghcb, msr); 208 ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value)); 209 ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value)); 210 211 if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0)) 212 pr_warn("Fail to write msr via ghcb %llx.\n", msr); 213 214 local_irq_restore(flags); 215 } 216 217 static void hv_ghcb_msr_read(u64 msr, u64 *value) 218 { 219 union hv_ghcb *hv_ghcb; 220 void **ghcb_base; 221 unsigned long flags; 222 223 /* Check size of union hv_ghcb here. */ 224 BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE); 225 226 if (!hv_ghcb_pg) 227 return; 228 229 WARN_ON(in_nmi()); 230 231 local_irq_save(flags); 232 ghcb_base = (void **)this_cpu_ptr(hv_ghcb_pg); 233 hv_ghcb = (union hv_ghcb *)*ghcb_base; 234 if (!hv_ghcb) { 235 local_irq_restore(flags); 236 return; 237 } 238 239 ghcb_set_rcx(&hv_ghcb->ghcb, msr); 240 if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0)) 241 pr_warn("Fail to read msr via ghcb %llx.\n", msr); 242 else 243 *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax) 244 | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32); 245 local_irq_restore(flags); 246 } 247 248 /* Only used in a fully enlightened SNP VM, i.e. without the paravisor */ 249 static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); 250 static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE); 251 static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa); 252 253 /* Functions only used in an SNP VM without the paravisor go here. */ 254 255 #define hv_populate_vmcb_seg(seg, gdtr_base) \ 256 do { \ 257 if (seg.selector) { \ 258 seg.base = 0; \ 259 seg.limit = HV_AP_SEGMENT_LIMIT; \ 260 seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \ 261 seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \ 262 } \ 263 } while (0) \ 264 265 static int snp_set_vmsa(void *va, bool vmsa) 266 { 267 u64 attrs; 268 269 /* 270 * Running at VMPL0 allows the kernel to change the VMSA bit for a page 271 * using the RMPADJUST instruction. However, for the instruction to 272 * succeed it must target the permissions of a lesser privileged 273 * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST 274 * instruction in the AMD64 APM Volume 3). 275 */ 276 attrs = 1; 277 if (vmsa) 278 attrs |= RMPADJUST_VMSA_PAGE_BIT; 279 280 return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 281 } 282 283 static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) 284 { 285 int err; 286 287 err = snp_set_vmsa(vmsa, false); 288 if (err) 289 pr_err("clear VMSA page failed (%u), leaking page\n", err); 290 else 291 free_page((unsigned long)vmsa); 292 } 293 294 int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) 295 { 296 struct sev_es_save_area *vmsa = (struct sev_es_save_area *) 297 __get_free_page(GFP_KERNEL | __GFP_ZERO); 298 struct sev_es_save_area *cur_vmsa; 299 struct desc_ptr gdtr; 300 u64 ret, retry = 5; 301 struct hv_enable_vp_vtl *start_vp_input; 302 unsigned long flags; 303 int vp_index; 304 305 if (!vmsa) 306 return -ENOMEM; 307 308 /* Find the Hyper-V VP index which might be not the same as APIC ID */ 309 vp_index = hv_apicid_to_vp_index(apic_id); 310 if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) 311 return -EINVAL; 312 313 native_store_gdt(&gdtr); 314 315 vmsa->gdtr.base = gdtr.address; 316 vmsa->gdtr.limit = gdtr.size; 317 318 asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); 319 hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); 320 321 asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); 322 hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); 323 324 asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); 325 hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); 326 327 asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); 328 hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); 329 330 vmsa->efer = native_read_msr(MSR_EFER); 331 332 vmsa->cr4 = native_read_cr4(); 333 vmsa->cr3 = __native_read_cr3(); 334 vmsa->cr0 = native_read_cr0(); 335 336 vmsa->xcr0 = 1; 337 vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; 338 vmsa->rip = (u64)secondary_startup_64_no_verify; 339 vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE]; 340 341 /* 342 * Set the SNP-specific fields for this VMSA: 343 * VMPL level 344 * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 345 */ 346 vmsa->vmpl = 0; 347 vmsa->sev_features = sev_status >> 2; 348 349 ret = snp_set_vmsa(vmsa, true); 350 if (ret) { 351 pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); 352 free_page((u64)vmsa); 353 return ret; 354 } 355 356 local_irq_save(flags); 357 start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; 358 memset(start_vp_input, 0, sizeof(*start_vp_input)); 359 start_vp_input->partition_id = -1; 360 start_vp_input->vp_index = vp_index; 361 start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; 362 *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; 363 364 do { 365 ret = hv_do_hypercall(HVCALL_START_VP, 366 start_vp_input, NULL); 367 } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--); 368 369 local_irq_restore(flags); 370 371 if (!hv_result_success(ret)) { 372 pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret); 373 snp_cleanup_vmsa(vmsa); 374 vmsa = NULL; 375 } 376 377 cur_vmsa = per_cpu(hv_sev_vmsa, cpu); 378 /* Free up any previous VMSA page */ 379 if (cur_vmsa) 380 snp_cleanup_vmsa(cur_vmsa); 381 382 /* Record the current VMSA page */ 383 per_cpu(hv_sev_vmsa, cpu) = vmsa; 384 385 return ret; 386 } 387 388 #else 389 static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} 390 static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} 391 #endif /* CONFIG_AMD_MEM_ENCRYPT */ 392 393 #ifdef CONFIG_INTEL_TDX_GUEST 394 static void hv_tdx_msr_write(u64 msr, u64 val) 395 { 396 struct tdx_module_args args = { 397 .r10 = TDX_HYPERCALL_STANDARD, 398 .r11 = EXIT_REASON_MSR_WRITE, 399 .r12 = msr, 400 .r13 = val, 401 }; 402 403 u64 ret = __tdx_hypercall(&args); 404 405 WARN_ONCE(ret, "Failed to emulate MSR write: %lld\n", ret); 406 } 407 408 static void hv_tdx_msr_read(u64 msr, u64 *val) 409 { 410 struct tdx_module_args args = { 411 .r10 = TDX_HYPERCALL_STANDARD, 412 .r11 = EXIT_REASON_MSR_READ, 413 .r12 = msr, 414 }; 415 416 u64 ret = __tdx_hypercall(&args); 417 418 if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret)) 419 *val = 0; 420 else 421 *val = args.r11; 422 } 423 424 u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) 425 { 426 struct tdx_module_args args = { }; 427 428 args.r10 = control; 429 args.rdx = param1; 430 args.r8 = param2; 431 432 (void)__tdx_hypercall(&args); 433 434 return args.r11; 435 } 436 437 #else 438 static inline void hv_tdx_msr_write(u64 msr, u64 value) {} 439 static inline void hv_tdx_msr_read(u64 msr, u64 *value) {} 440 #endif /* CONFIG_INTEL_TDX_GUEST */ 441 442 #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) 443 void hv_ivm_msr_write(u64 msr, u64 value) 444 { 445 if (!ms_hyperv.paravisor_present) 446 return; 447 448 if (hv_isolation_type_tdx()) 449 hv_tdx_msr_write(msr, value); 450 else if (hv_isolation_type_snp()) 451 hv_ghcb_msr_write(msr, value); 452 } 453 454 void hv_ivm_msr_read(u64 msr, u64 *value) 455 { 456 if (!ms_hyperv.paravisor_present) 457 return; 458 459 if (hv_isolation_type_tdx()) 460 hv_tdx_msr_read(msr, value); 461 else if (hv_isolation_type_snp()) 462 hv_ghcb_msr_read(msr, value); 463 } 464 465 /* 466 * Keep track of the PFN regions which were shared with the host. The access 467 * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()). 468 */ 469 struct hv_enc_pfn_region { 470 struct list_head list; 471 u64 pfn; 472 int count; 473 }; 474 475 static LIST_HEAD(hv_list_enc); 476 static DEFINE_RAW_SPINLOCK(hv_list_enc_lock); 477 478 static int hv_list_enc_add(const u64 *pfn_list, int count) 479 { 480 struct hv_enc_pfn_region *ent; 481 unsigned long flags; 482 u64 pfn; 483 int i; 484 485 for (i = 0; i < count; i++) { 486 pfn = pfn_list[i]; 487 488 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 489 /* Check if the PFN already exists in some region first */ 490 list_for_each_entry(ent, &hv_list_enc, list) { 491 if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn)) 492 /* Nothing to do - pfn is already in the list */ 493 goto unlock_done; 494 } 495 496 /* 497 * Check if the PFN is adjacent to an existing region. Growing 498 * a region can make it adjacent to another one but merging is 499 * not (yet) implemented for simplicity. A PFN cannot be added 500 * to two regions to keep the logic in hv_list_enc_remove() 501 * correct. 502 */ 503 list_for_each_entry(ent, &hv_list_enc, list) { 504 if (ent->pfn + ent->count == pfn) { 505 /* Grow existing region up */ 506 ent->count++; 507 goto unlock_done; 508 } else if (pfn + 1 == ent->pfn) { 509 /* Grow existing region down */ 510 ent->pfn--; 511 ent->count++; 512 goto unlock_done; 513 } 514 } 515 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 516 517 /* No adjacent region found -- create a new one */ 518 ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); 519 if (!ent) 520 return -ENOMEM; 521 522 ent->pfn = pfn; 523 ent->count = 1; 524 525 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 526 list_add(&ent->list, &hv_list_enc); 527 528 unlock_done: 529 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 530 } 531 532 return 0; 533 } 534 535 static int hv_list_enc_remove(const u64 *pfn_list, int count) 536 { 537 struct hv_enc_pfn_region *ent, *t; 538 struct hv_enc_pfn_region new_region; 539 unsigned long flags; 540 u64 pfn; 541 int i; 542 543 for (i = 0; i < count; i++) { 544 pfn = pfn_list[i]; 545 546 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 547 list_for_each_entry_safe(ent, t, &hv_list_enc, list) { 548 if (pfn == ent->pfn + ent->count - 1) { 549 /* Removing tail pfn */ 550 ent->count--; 551 if (!ent->count) { 552 list_del(&ent->list); 553 kfree(ent); 554 } 555 goto unlock_done; 556 } else if (pfn == ent->pfn) { 557 /* Removing head pfn */ 558 ent->count--; 559 ent->pfn++; 560 if (!ent->count) { 561 list_del(&ent->list); 562 kfree(ent); 563 } 564 goto unlock_done; 565 } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) { 566 /* 567 * Removing a pfn in the middle. Cut off the tail 568 * of the existing region and create a template for 569 * the new one. 570 */ 571 new_region.pfn = pfn + 1; 572 new_region.count = ent->count - (pfn - ent->pfn + 1); 573 ent->count = pfn - ent->pfn; 574 goto unlock_split; 575 } 576 577 } 578 unlock_done: 579 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 580 continue; 581 582 unlock_split: 583 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 584 585 ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL); 586 if (!ent) 587 return -ENOMEM; 588 589 ent->pfn = new_region.pfn; 590 ent->count = new_region.count; 591 592 raw_spin_lock_irqsave(&hv_list_enc_lock, flags); 593 list_add(&ent->list, &hv_list_enc); 594 raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags); 595 } 596 597 return 0; 598 } 599 600 /* Stop new private<->shared conversions */ 601 static void hv_vtom_kexec_begin(void) 602 { 603 if (!IS_ENABLED(CONFIG_KEXEC_CORE)) 604 return; 605 606 /* 607 * Crash kernel reaches here with interrupts disabled: can't wait for 608 * conversions to finish. 609 * 610 * If race happened, just report and proceed. 611 */ 612 if (!set_memory_enc_stop_conversion()) 613 pr_warn("Failed to stop shared<->private conversions\n"); 614 } 615 616 static void hv_vtom_kexec_finish(void) 617 { 618 struct hv_gpa_range_for_visibility *input; 619 struct hv_enc_pfn_region *ent; 620 unsigned long flags; 621 u64 hv_status; 622 int cur, i; 623 624 local_irq_save(flags); 625 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 626 627 if (unlikely(!input)) 628 goto out; 629 630 list_for_each_entry(ent, &hv_list_enc, list) { 631 for (i = 0, cur = 0; i < ent->count; i++) { 632 input->gpa_page_list[cur] = ent->pfn + i; 633 cur++; 634 635 if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) { 636 input->partition_id = HV_PARTITION_ID_SELF; 637 input->host_visibility = VMBUS_PAGE_NOT_VISIBLE; 638 input->reserved0 = 0; 639 input->reserved1 = 0; 640 hv_status = hv_do_rep_hypercall( 641 HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, 642 cur, 0, input, NULL); 643 WARN_ON_ONCE(!hv_result_success(hv_status)); 644 cur = 0; 645 } 646 } 647 648 } 649 650 out: 651 local_irq_restore(flags); 652 } 653 654 /* 655 * hv_mark_gpa_visibility - Set pages visible to host via hvcall. 656 * 657 * In Isolation VM, all guest memory is encrypted from host and guest 658 * needs to set memory visible to host via hvcall before sharing memory 659 * with host. 660 */ 661 static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], 662 enum hv_mem_host_visibility visibility) 663 { 664 struct hv_gpa_range_for_visibility *input; 665 u64 hv_status; 666 unsigned long flags; 667 int ret; 668 669 /* no-op if partition isolation is not enabled */ 670 if (!hv_is_isolation_supported()) 671 return 0; 672 673 if (count > HV_MAX_MODIFY_GPA_REP_COUNT) { 674 pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count, 675 HV_MAX_MODIFY_GPA_REP_COUNT); 676 return -EINVAL; 677 } 678 679 if (visibility == VMBUS_PAGE_NOT_VISIBLE) 680 ret = hv_list_enc_remove(pfn, count); 681 else 682 ret = hv_list_enc_add(pfn, count); 683 if (ret) 684 return ret; 685 686 local_irq_save(flags); 687 input = *this_cpu_ptr(hyperv_pcpu_input_arg); 688 689 if (unlikely(!input)) { 690 local_irq_restore(flags); 691 return -EINVAL; 692 } 693 694 input->partition_id = HV_PARTITION_ID_SELF; 695 input->host_visibility = visibility; 696 input->reserved0 = 0; 697 input->reserved1 = 0; 698 memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn)); 699 hv_status = hv_do_rep_hypercall( 700 HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count, 701 0, input, NULL); 702 local_irq_restore(flags); 703 704 if (hv_result_success(hv_status)) 705 return 0; 706 707 if (visibility == VMBUS_PAGE_NOT_VISIBLE) 708 ret = hv_list_enc_add(pfn, count); 709 else 710 ret = hv_list_enc_remove(pfn, count); 711 /* 712 * There's no good way to recover from -ENOMEM here, the accounting is 713 * wrong either way. 714 */ 715 WARN_ON_ONCE(ret); 716 717 return -EFAULT; 718 } 719 720 /* 721 * When transitioning memory between encrypted and decrypted, the caller 722 * of set_memory_encrypted() or set_memory_decrypted() is responsible for 723 * ensuring that the memory isn't in use and isn't referenced while the 724 * transition is in progress. The transition has multiple steps, and the 725 * memory is in an inconsistent state until all steps are complete. A 726 * reference while the state is inconsistent could result in an exception 727 * that can't be cleanly fixed up. 728 * 729 * But the Linux kernel load_unaligned_zeropad() mechanism could cause a 730 * stray reference that can't be prevented by the caller, so Linux has 731 * specific code to handle this case. But when the #VC and #VE exceptions 732 * routed to a paravisor, the specific code doesn't work. To avoid this 733 * problem, mark the pages as "not present" while the transition is in 734 * progress. If load_unaligned_zeropad() causes a stray reference, a normal 735 * page fault is generated instead of #VC or #VE, and the page-fault-based 736 * handlers for load_unaligned_zeropad() resolve the reference. When the 737 * transition is complete, hv_vtom_set_host_visibility() marks the pages 738 * as "present" again. 739 */ 740 static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) 741 { 742 return set_memory_np(kbuffer, pagecount); 743 } 744 745 /* 746 * hv_vtom_set_host_visibility - Set specified memory visible to host. 747 * 748 * In Isolation VM, all guest memory is encrypted from host and guest 749 * needs to set memory visible to host via hvcall before sharing memory 750 * with host. This function works as wrap of hv_mark_gpa_visibility() 751 * with memory base and size. 752 */ 753 static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) 754 { 755 enum hv_mem_host_visibility visibility = enc ? 756 VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; 757 u64 *pfn_array; 758 phys_addr_t paddr; 759 int i, pfn, err; 760 void *vaddr; 761 int ret = 0; 762 763 pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); 764 if (!pfn_array) { 765 ret = -ENOMEM; 766 goto err_set_memory_p; 767 } 768 769 for (i = 0, pfn = 0; i < pagecount; i++) { 770 /* 771 * Use slow_virt_to_phys() because the PRESENT bit has been 772 * temporarily cleared in the PTEs. slow_virt_to_phys() works 773 * without the PRESENT bit while virt_to_hvpfn() or similar 774 * does not. 775 */ 776 vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE); 777 paddr = slow_virt_to_phys(vaddr); 778 pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT; 779 pfn++; 780 781 if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { 782 ret = hv_mark_gpa_visibility(pfn, pfn_array, 783 visibility); 784 if (ret) 785 goto err_free_pfn_array; 786 pfn = 0; 787 } 788 } 789 790 err_free_pfn_array: 791 kfree(pfn_array); 792 793 err_set_memory_p: 794 /* 795 * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present() 796 * did. Do this even if there is an error earlier in this function in 797 * order to avoid leaving the memory range in a "broken" state. Setting 798 * the PRESENT bits shouldn't fail, but return an error if it does. 799 */ 800 err = set_memory_p(kbuffer, pagecount); 801 if (err && !ret) 802 ret = err; 803 804 return ret; 805 } 806 807 static bool hv_vtom_tlb_flush_required(bool private) 808 { 809 /* 810 * Since hv_vtom_clear_present() marks the PTEs as "not present" 811 * and flushes the TLB, they can't be in the TLB. That makes the 812 * flush controlled by this function redundant, so return "false". 813 */ 814 return false; 815 } 816 817 static bool hv_vtom_cache_flush_required(void) 818 { 819 return false; 820 } 821 822 static bool hv_is_private_mmio(u64 addr) 823 { 824 /* 825 * Hyper-V always provides a single IO-APIC in a guest VM. 826 * When a paravisor is used, it is emulated by the paravisor 827 * in the guest context and must be mapped private. 828 */ 829 if (addr >= HV_IOAPIC_BASE_ADDRESS && 830 addr < (HV_IOAPIC_BASE_ADDRESS + PAGE_SIZE)) 831 return true; 832 833 /* Same with a vTPM */ 834 if (addr >= VTPM_BASE_ADDRESS && 835 addr < (VTPM_BASE_ADDRESS + PAGE_SIZE)) 836 return true; 837 838 return false; 839 } 840 841 void __init hv_vtom_init(void) 842 { 843 enum hv_isolation_type type = hv_get_isolation_type(); 844 845 switch (type) { 846 case HV_ISOLATION_TYPE_VBS: 847 fallthrough; 848 /* 849 * By design, a VM using vTOM doesn't see the SEV setting, 850 * so SEV initialization is bypassed and sev_status isn't set. 851 * Set it here to indicate a vTOM VM. 852 * 853 * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is 854 * defined as 0ULL, to which we can't assigned a value. 855 */ 856 #ifdef CONFIG_AMD_MEM_ENCRYPT 857 case HV_ISOLATION_TYPE_SNP: 858 sev_status = MSR_AMD64_SNP_VTOM; 859 cc_vendor = CC_VENDOR_AMD; 860 break; 861 #endif 862 863 case HV_ISOLATION_TYPE_TDX: 864 cc_vendor = CC_VENDOR_INTEL; 865 break; 866 867 default: 868 panic("hv_vtom_init: unsupported isolation type %d\n", type); 869 } 870 871 cc_set_mask(ms_hyperv.shared_gpa_boundary); 872 physical_mask &= ms_hyperv.shared_gpa_boundary - 1; 873 874 x86_platform.hyper.is_private_mmio = hv_is_private_mmio; 875 x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; 876 x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; 877 x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; 878 x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; 879 x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin; 880 x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish; 881 882 /* Set WB as the default cache mode. */ 883 guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK); 884 } 885 886 #endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ 887 888 enum hv_isolation_type hv_get_isolation_type(void) 889 { 890 if (!(ms_hyperv.priv_high & HV_ISOLATION)) 891 return HV_ISOLATION_TYPE_NONE; 892 return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); 893 } 894 EXPORT_SYMBOL_GPL(hv_get_isolation_type); 895 896 /* 897 * hv_is_isolation_supported - Check system runs in the Hyper-V 898 * isolation VM. 899 */ 900 bool hv_is_isolation_supported(void) 901 { 902 if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) 903 return false; 904 905 if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) 906 return false; 907 908 return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; 909 } 910 911 DEFINE_STATIC_KEY_FALSE(isolation_type_snp); 912 913 /* 914 * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based 915 * isolation VM. 916 */ 917 bool hv_isolation_type_snp(void) 918 { 919 return static_branch_unlikely(&isolation_type_snp); 920 } 921 922 DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); 923 /* 924 * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based 925 * isolated VM. 926 */ 927 bool hv_isolation_type_tdx(void) 928 { 929 return static_branch_unlikely(&isolation_type_tdx); 930 } 931