1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM-SEV support 6 * 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 */ 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kvm_types.h> 12 #include <linux/kvm_host.h> 13 #include <linux/kernel.h> 14 #include <linux/highmem.h> 15 #include <linux/psp.h> 16 #include <linux/psp-sev.h> 17 #include <linux/pagemap.h> 18 #include <linux/swap.h> 19 #include <linux/misc_cgroup.h> 20 #include <linux/processor.h> 21 #include <linux/trace_events.h> 22 23 #include <asm/pkru.h> 24 #include <asm/trapnr.h> 25 #include <asm/fpu/xcr.h> 26 #include <asm/fpu/xstate.h> 27 #include <asm/debugreg.h> 28 29 #include "mmu.h" 30 #include "x86.h" 31 #include "svm.h" 32 #include "svm_ops.h" 33 #include "cpuid.h" 34 #include "trace.h" 35 36 #define GHCB_VERSION_MAX 2ULL 37 #define GHCB_VERSION_DEFAULT 2ULL 38 #define GHCB_VERSION_MIN 1ULL 39 40 #define GHCB_HV_FT_SUPPORTED GHCB_HV_FT_SNP 41 42 /* enable/disable SEV support */ 43 static bool sev_enabled = true; 44 module_param_named(sev, sev_enabled, bool, 0444); 45 46 /* enable/disable SEV-ES support */ 47 static bool sev_es_enabled = true; 48 module_param_named(sev_es, sev_es_enabled, bool, 0444); 49 50 /* enable/disable SEV-ES DebugSwap support */ 51 static bool sev_es_debug_swap_enabled = true; 52 module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); 53 static u64 sev_supported_vmsa_features; 54 55 #define AP_RESET_HOLD_NONE 0 56 #define AP_RESET_HOLD_NAE_EVENT 1 57 #define AP_RESET_HOLD_MSR_PROTO 2 58 59 static u8 sev_enc_bit; 60 static DECLARE_RWSEM(sev_deactivate_lock); 61 static DEFINE_MUTEX(sev_bitmap_lock); 62 unsigned int max_sev_asid; 63 static unsigned int min_sev_asid; 64 static unsigned long sev_me_mask; 65 static unsigned int nr_asids; 66 static unsigned long *sev_asid_bitmap; 67 static unsigned long *sev_reclaim_asid_bitmap; 68 69 struct enc_region { 70 struct list_head list; 71 unsigned long npages; 72 struct page **pages; 73 unsigned long uaddr; 74 unsigned long size; 75 }; 76 77 /* Called with the sev_bitmap_lock held, or on shutdown */ 78 static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) 79 { 80 int ret, error = 0; 81 unsigned int asid; 82 83 /* Check if there are any ASIDs to reclaim before performing a flush */ 84 asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); 85 if (asid > max_asid) 86 return -EBUSY; 87 88 /* 89 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, 90 * so it must be guarded. 91 */ 92 down_write(&sev_deactivate_lock); 93 94 wbinvd_on_all_cpus(); 95 ret = sev_guest_df_flush(&error); 96 97 up_write(&sev_deactivate_lock); 98 99 if (ret) 100 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error); 101 102 return ret; 103 } 104 105 static inline bool is_mirroring_enc_context(struct kvm *kvm) 106 { 107 return !!to_kvm_sev_info(kvm)->enc_context_owner; 108 } 109 110 static bool sev_vcpu_has_debug_swap(struct vcpu_svm *svm) 111 { 112 struct kvm_vcpu *vcpu = &svm->vcpu; 113 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 114 115 return sev->vmsa_features & SVM_SEV_FEAT_DEBUG_SWAP; 116 } 117 118 /* Must be called with the sev_bitmap_lock held */ 119 static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) 120 { 121 if (sev_flush_asids(min_asid, max_asid)) 122 return false; 123 124 /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ 125 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, 126 nr_asids); 127 bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); 128 129 return true; 130 } 131 132 static int sev_misc_cg_try_charge(struct kvm_sev_info *sev) 133 { 134 enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 135 return misc_cg_try_charge(type, sev->misc_cg, 1); 136 } 137 138 static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) 139 { 140 enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 141 misc_cg_uncharge(type, sev->misc_cg, 1); 142 } 143 144 static int sev_asid_new(struct kvm_sev_info *sev) 145 { 146 /* 147 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. 148 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. 149 * Note: min ASID can end up larger than the max if basic SEV support is 150 * effectively disabled by disallowing use of ASIDs for SEV guests. 151 */ 152 unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; 153 unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; 154 unsigned int asid; 155 bool retry = true; 156 int ret; 157 158 if (min_asid > max_asid) 159 return -ENOTTY; 160 161 WARN_ON(sev->misc_cg); 162 sev->misc_cg = get_current_misc_cg(); 163 ret = sev_misc_cg_try_charge(sev); 164 if (ret) { 165 put_misc_cg(sev->misc_cg); 166 sev->misc_cg = NULL; 167 return ret; 168 } 169 170 mutex_lock(&sev_bitmap_lock); 171 172 again: 173 asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); 174 if (asid > max_asid) { 175 if (retry && __sev_recycle_asids(min_asid, max_asid)) { 176 retry = false; 177 goto again; 178 } 179 mutex_unlock(&sev_bitmap_lock); 180 ret = -EBUSY; 181 goto e_uncharge; 182 } 183 184 __set_bit(asid, sev_asid_bitmap); 185 186 mutex_unlock(&sev_bitmap_lock); 187 188 sev->asid = asid; 189 return 0; 190 e_uncharge: 191 sev_misc_cg_uncharge(sev); 192 put_misc_cg(sev->misc_cg); 193 sev->misc_cg = NULL; 194 return ret; 195 } 196 197 static unsigned int sev_get_asid(struct kvm *kvm) 198 { 199 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 200 201 return sev->asid; 202 } 203 204 static void sev_asid_free(struct kvm_sev_info *sev) 205 { 206 struct svm_cpu_data *sd; 207 int cpu; 208 209 mutex_lock(&sev_bitmap_lock); 210 211 __set_bit(sev->asid, sev_reclaim_asid_bitmap); 212 213 for_each_possible_cpu(cpu) { 214 sd = per_cpu_ptr(&svm_data, cpu); 215 sd->sev_vmcbs[sev->asid] = NULL; 216 } 217 218 mutex_unlock(&sev_bitmap_lock); 219 220 sev_misc_cg_uncharge(sev); 221 put_misc_cg(sev->misc_cg); 222 sev->misc_cg = NULL; 223 } 224 225 static void sev_decommission(unsigned int handle) 226 { 227 struct sev_data_decommission decommission; 228 229 if (!handle) 230 return; 231 232 decommission.handle = handle; 233 sev_guest_decommission(&decommission, NULL); 234 } 235 236 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) 237 { 238 struct sev_data_deactivate deactivate; 239 240 if (!handle) 241 return; 242 243 deactivate.handle = handle; 244 245 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ 246 down_read(&sev_deactivate_lock); 247 sev_guest_deactivate(&deactivate, NULL); 248 up_read(&sev_deactivate_lock); 249 250 sev_decommission(handle); 251 } 252 253 static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, 254 struct kvm_sev_init *data, 255 unsigned long vm_type) 256 { 257 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 258 struct sev_platform_init_args init_args = {0}; 259 bool es_active = vm_type != KVM_X86_SEV_VM; 260 u64 valid_vmsa_features = es_active ? sev_supported_vmsa_features : 0; 261 int ret; 262 263 if (kvm->created_vcpus) 264 return -EINVAL; 265 266 if (data->flags) 267 return -EINVAL; 268 269 if (data->vmsa_features & ~valid_vmsa_features) 270 return -EINVAL; 271 272 if (data->ghcb_version > GHCB_VERSION_MAX || (!es_active && data->ghcb_version)) 273 return -EINVAL; 274 275 if (unlikely(sev->active)) 276 return -EINVAL; 277 278 sev->active = true; 279 sev->es_active = es_active; 280 sev->vmsa_features = data->vmsa_features; 281 sev->ghcb_version = data->ghcb_version; 282 283 /* 284 * Currently KVM supports the full range of mandatory features defined 285 * by version 2 of the GHCB protocol, so default to that for SEV-ES 286 * guests created via KVM_SEV_INIT2. 287 */ 288 if (sev->es_active && !sev->ghcb_version) 289 sev->ghcb_version = GHCB_VERSION_DEFAULT; 290 291 ret = sev_asid_new(sev); 292 if (ret) 293 goto e_no_asid; 294 295 init_args.probe = false; 296 ret = sev_platform_init(&init_args); 297 if (ret) 298 goto e_free; 299 300 INIT_LIST_HEAD(&sev->regions_list); 301 INIT_LIST_HEAD(&sev->mirror_vms); 302 sev->need_init = false; 303 304 kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_SEV); 305 306 return 0; 307 308 e_free: 309 argp->error = init_args.error; 310 sev_asid_free(sev); 311 sev->asid = 0; 312 e_no_asid: 313 sev->vmsa_features = 0; 314 sev->es_active = false; 315 sev->active = false; 316 return ret; 317 } 318 319 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) 320 { 321 struct kvm_sev_init data = { 322 .vmsa_features = 0, 323 .ghcb_version = 0, 324 }; 325 unsigned long vm_type; 326 327 if (kvm->arch.vm_type != KVM_X86_DEFAULT_VM) 328 return -EINVAL; 329 330 vm_type = (argp->id == KVM_SEV_INIT ? KVM_X86_SEV_VM : KVM_X86_SEV_ES_VM); 331 332 /* 333 * KVM_SEV_ES_INIT has been deprecated by KVM_SEV_INIT2, so it will 334 * continue to only ever support the minimal GHCB protocol version. 335 */ 336 if (vm_type == KVM_X86_SEV_ES_VM) 337 data.ghcb_version = GHCB_VERSION_MIN; 338 339 return __sev_guest_init(kvm, argp, &data, vm_type); 340 } 341 342 static int sev_guest_init2(struct kvm *kvm, struct kvm_sev_cmd *argp) 343 { 344 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 345 struct kvm_sev_init data; 346 347 if (!sev->need_init) 348 return -EINVAL; 349 350 if (kvm->arch.vm_type != KVM_X86_SEV_VM && 351 kvm->arch.vm_type != KVM_X86_SEV_ES_VM) 352 return -EINVAL; 353 354 if (copy_from_user(&data, u64_to_user_ptr(argp->data), sizeof(data))) 355 return -EFAULT; 356 357 return __sev_guest_init(kvm, argp, &data, kvm->arch.vm_type); 358 } 359 360 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) 361 { 362 unsigned int asid = sev_get_asid(kvm); 363 struct sev_data_activate activate; 364 int ret; 365 366 /* activate ASID on the given handle */ 367 activate.handle = handle; 368 activate.asid = asid; 369 ret = sev_guest_activate(&activate, error); 370 371 return ret; 372 } 373 374 static int __sev_issue_cmd(int fd, int id, void *data, int *error) 375 { 376 struct fd f; 377 int ret; 378 379 f = fdget(fd); 380 if (!f.file) 381 return -EBADF; 382 383 ret = sev_issue_cmd_external_user(f.file, id, data, error); 384 385 fdput(f); 386 return ret; 387 } 388 389 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) 390 { 391 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 392 393 return __sev_issue_cmd(sev->fd, id, data, error); 394 } 395 396 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 397 { 398 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 399 struct sev_data_launch_start start; 400 struct kvm_sev_launch_start params; 401 void *dh_blob, *session_blob; 402 int *error = &argp->error; 403 int ret; 404 405 if (!sev_guest(kvm)) 406 return -ENOTTY; 407 408 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) 409 return -EFAULT; 410 411 memset(&start, 0, sizeof(start)); 412 413 dh_blob = NULL; 414 if (params.dh_uaddr) { 415 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len); 416 if (IS_ERR(dh_blob)) 417 return PTR_ERR(dh_blob); 418 419 start.dh_cert_address = __sme_set(__pa(dh_blob)); 420 start.dh_cert_len = params.dh_len; 421 } 422 423 session_blob = NULL; 424 if (params.session_uaddr) { 425 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len); 426 if (IS_ERR(session_blob)) { 427 ret = PTR_ERR(session_blob); 428 goto e_free_dh; 429 } 430 431 start.session_address = __sme_set(__pa(session_blob)); 432 start.session_len = params.session_len; 433 } 434 435 start.handle = params.handle; 436 start.policy = params.policy; 437 438 /* create memory encryption context */ 439 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error); 440 if (ret) 441 goto e_free_session; 442 443 /* Bind ASID to this guest */ 444 ret = sev_bind_asid(kvm, start.handle, error); 445 if (ret) { 446 sev_decommission(start.handle); 447 goto e_free_session; 448 } 449 450 /* return handle to userspace */ 451 params.handle = start.handle; 452 if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params))) { 453 sev_unbind_asid(kvm, start.handle); 454 ret = -EFAULT; 455 goto e_free_session; 456 } 457 458 sev->handle = start.handle; 459 sev->fd = argp->sev_fd; 460 461 e_free_session: 462 kfree(session_blob); 463 e_free_dh: 464 kfree(dh_blob); 465 return ret; 466 } 467 468 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, 469 unsigned long ulen, unsigned long *n, 470 int write) 471 { 472 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 473 unsigned long npages, size; 474 int npinned; 475 unsigned long locked, lock_limit; 476 struct page **pages; 477 unsigned long first, last; 478 int ret; 479 480 lockdep_assert_held(&kvm->lock); 481 482 if (ulen == 0 || uaddr + ulen < uaddr) 483 return ERR_PTR(-EINVAL); 484 485 /* Calculate number of pages. */ 486 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; 487 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; 488 npages = (last - first + 1); 489 490 locked = sev->pages_locked + npages; 491 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 492 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { 493 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit); 494 return ERR_PTR(-ENOMEM); 495 } 496 497 if (WARN_ON_ONCE(npages > INT_MAX)) 498 return ERR_PTR(-EINVAL); 499 500 /* Avoid using vmalloc for smaller buffers. */ 501 size = npages * sizeof(struct page *); 502 if (size > PAGE_SIZE) 503 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT); 504 else 505 pages = kmalloc(size, GFP_KERNEL_ACCOUNT); 506 507 if (!pages) 508 return ERR_PTR(-ENOMEM); 509 510 /* Pin the user virtual address. */ 511 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); 512 if (npinned != npages) { 513 pr_err("SEV: Failure locking %lu pages.\n", npages); 514 ret = -ENOMEM; 515 goto err; 516 } 517 518 *n = npages; 519 sev->pages_locked = locked; 520 521 return pages; 522 523 err: 524 if (npinned > 0) 525 unpin_user_pages(pages, npinned); 526 527 kvfree(pages); 528 return ERR_PTR(ret); 529 } 530 531 static void sev_unpin_memory(struct kvm *kvm, struct page **pages, 532 unsigned long npages) 533 { 534 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 535 536 unpin_user_pages(pages, npages); 537 kvfree(pages); 538 sev->pages_locked -= npages; 539 } 540 541 static void sev_clflush_pages(struct page *pages[], unsigned long npages) 542 { 543 uint8_t *page_virtual; 544 unsigned long i; 545 546 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 || 547 pages == NULL) 548 return; 549 550 for (i = 0; i < npages; i++) { 551 page_virtual = kmap_local_page(pages[i]); 552 clflush_cache_range(page_virtual, PAGE_SIZE); 553 kunmap_local(page_virtual); 554 cond_resched(); 555 } 556 } 557 558 static unsigned long get_num_contig_pages(unsigned long idx, 559 struct page **inpages, unsigned long npages) 560 { 561 unsigned long paddr, next_paddr; 562 unsigned long i = idx + 1, pages = 1; 563 564 /* find the number of contiguous pages starting from idx */ 565 paddr = __sme_page_pa(inpages[idx]); 566 while (i < npages) { 567 next_paddr = __sme_page_pa(inpages[i++]); 568 if ((paddr + PAGE_SIZE) == next_paddr) { 569 pages++; 570 paddr = next_paddr; 571 continue; 572 } 573 break; 574 } 575 576 return pages; 577 } 578 579 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 580 { 581 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; 582 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 583 struct kvm_sev_launch_update_data params; 584 struct sev_data_launch_update_data data; 585 struct page **inpages; 586 int ret; 587 588 if (!sev_guest(kvm)) 589 return -ENOTTY; 590 591 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) 592 return -EFAULT; 593 594 vaddr = params.uaddr; 595 size = params.len; 596 vaddr_end = vaddr + size; 597 598 /* Lock the user memory. */ 599 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1); 600 if (IS_ERR(inpages)) 601 return PTR_ERR(inpages); 602 603 /* 604 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in 605 * place; the cache may contain the data that was written unencrypted. 606 */ 607 sev_clflush_pages(inpages, npages); 608 609 data.reserved = 0; 610 data.handle = sev->handle; 611 612 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { 613 int offset, len; 614 615 /* 616 * If the user buffer is not page-aligned, calculate the offset 617 * within the page. 618 */ 619 offset = vaddr & (PAGE_SIZE - 1); 620 621 /* Calculate the number of pages that can be encrypted in one go. */ 622 pages = get_num_contig_pages(i, inpages, npages); 623 624 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); 625 626 data.len = len; 627 data.address = __sme_page_pa(inpages[i]) + offset; 628 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error); 629 if (ret) 630 goto e_unpin; 631 632 size -= len; 633 next_vaddr = vaddr + len; 634 } 635 636 e_unpin: 637 /* content of memory is updated, mark pages dirty */ 638 for (i = 0; i < npages; i++) { 639 set_page_dirty_lock(inpages[i]); 640 mark_page_accessed(inpages[i]); 641 } 642 /* unlock the user pages */ 643 sev_unpin_memory(kvm, inpages, npages); 644 return ret; 645 } 646 647 static int sev_es_sync_vmsa(struct vcpu_svm *svm) 648 { 649 struct kvm_vcpu *vcpu = &svm->vcpu; 650 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 651 struct sev_es_save_area *save = svm->sev_es.vmsa; 652 struct xregs_state *xsave; 653 const u8 *s; 654 u8 *d; 655 int i; 656 657 /* Check some debug related fields before encrypting the VMSA */ 658 if (svm->vcpu.guest_debug || (svm->vmcb->save.dr7 & ~DR7_FIXED_1)) 659 return -EINVAL; 660 661 /* 662 * SEV-ES will use a VMSA that is pointed to by the VMCB, not 663 * the traditional VMSA that is part of the VMCB. Copy the 664 * traditional VMSA as it has been built so far (in prep 665 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. 666 */ 667 memcpy(save, &svm->vmcb->save, sizeof(svm->vmcb->save)); 668 669 /* Sync registgers */ 670 save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; 671 save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; 672 save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 673 save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; 674 save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; 675 save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; 676 save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; 677 save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; 678 #ifdef CONFIG_X86_64 679 save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; 680 save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; 681 save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; 682 save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; 683 save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; 684 save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; 685 save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; 686 save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; 687 #endif 688 save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; 689 690 /* Sync some non-GPR registers before encrypting */ 691 save->xcr0 = svm->vcpu.arch.xcr0; 692 save->pkru = svm->vcpu.arch.pkru; 693 save->xss = svm->vcpu.arch.ia32_xss; 694 save->dr6 = svm->vcpu.arch.dr6; 695 696 save->sev_features = sev->vmsa_features; 697 698 /* 699 * Skip FPU and AVX setup with KVM_SEV_ES_INIT to avoid 700 * breaking older measurements. 701 */ 702 if (vcpu->kvm->arch.vm_type != KVM_X86_DEFAULT_VM) { 703 xsave = &vcpu->arch.guest_fpu.fpstate->regs.xsave; 704 save->x87_dp = xsave->i387.rdp; 705 save->mxcsr = xsave->i387.mxcsr; 706 save->x87_ftw = xsave->i387.twd; 707 save->x87_fsw = xsave->i387.swd; 708 save->x87_fcw = xsave->i387.cwd; 709 save->x87_fop = xsave->i387.fop; 710 save->x87_ds = 0; 711 save->x87_cs = 0; 712 save->x87_rip = xsave->i387.rip; 713 714 for (i = 0; i < 8; i++) { 715 /* 716 * The format of the x87 save area is undocumented and 717 * definitely not what you would expect. It consists of 718 * an 8*8 bytes area with bytes 0-7, and an 8*2 bytes 719 * area with bytes 8-9 of each register. 720 */ 721 d = save->fpreg_x87 + i * 8; 722 s = ((u8 *)xsave->i387.st_space) + i * 16; 723 memcpy(d, s, 8); 724 save->fpreg_x87[64 + i * 2] = s[8]; 725 save->fpreg_x87[64 + i * 2 + 1] = s[9]; 726 } 727 memcpy(save->fpreg_xmm, xsave->i387.xmm_space, 256); 728 729 s = get_xsave_addr(xsave, XFEATURE_YMM); 730 if (s) 731 memcpy(save->fpreg_ymm, s, 256); 732 else 733 memset(save->fpreg_ymm, 0, 256); 734 } 735 736 pr_debug("Virtual Machine Save Area (VMSA):\n"); 737 print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); 738 739 return 0; 740 } 741 742 static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, 743 int *error) 744 { 745 struct sev_data_launch_update_vmsa vmsa; 746 struct vcpu_svm *svm = to_svm(vcpu); 747 int ret; 748 749 if (vcpu->guest_debug) { 750 pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported"); 751 return -EINVAL; 752 } 753 754 /* Perform some pre-encryption checks against the VMSA */ 755 ret = sev_es_sync_vmsa(svm); 756 if (ret) 757 return ret; 758 759 /* 760 * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of 761 * the VMSA memory content (i.e it will write the same memory region 762 * with the guest's key), so invalidate it first. 763 */ 764 clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); 765 766 vmsa.reserved = 0; 767 vmsa.handle = to_kvm_sev_info(kvm)->handle; 768 vmsa.address = __sme_pa(svm->sev_es.vmsa); 769 vmsa.len = PAGE_SIZE; 770 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); 771 if (ret) 772 return ret; 773 774 /* 775 * SEV-ES guests maintain an encrypted version of their FPU 776 * state which is restored and saved on VMRUN and VMEXIT. 777 * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't 778 * do xsave/xrstor on it. 779 */ 780 fpstate_set_confidential(&vcpu->arch.guest_fpu); 781 vcpu->arch.guest_state_protected = true; 782 783 /* 784 * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it 785 * only after setting guest_state_protected because KVM_SET_MSRS allows 786 * dynamic toggling of LBRV (for performance reason) on write access to 787 * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set. 788 */ 789 svm_enable_lbrv(vcpu); 790 return 0; 791 } 792 793 static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) 794 { 795 struct kvm_vcpu *vcpu; 796 unsigned long i; 797 int ret; 798 799 if (!sev_es_guest(kvm)) 800 return -ENOTTY; 801 802 kvm_for_each_vcpu(i, vcpu, kvm) { 803 ret = mutex_lock_killable(&vcpu->mutex); 804 if (ret) 805 return ret; 806 807 ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error); 808 809 mutex_unlock(&vcpu->mutex); 810 if (ret) 811 return ret; 812 } 813 814 return 0; 815 } 816 817 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 818 { 819 void __user *measure = u64_to_user_ptr(argp->data); 820 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 821 struct sev_data_launch_measure data; 822 struct kvm_sev_launch_measure params; 823 void __user *p = NULL; 824 void *blob = NULL; 825 int ret; 826 827 if (!sev_guest(kvm)) 828 return -ENOTTY; 829 830 if (copy_from_user(¶ms, measure, sizeof(params))) 831 return -EFAULT; 832 833 memset(&data, 0, sizeof(data)); 834 835 /* User wants to query the blob length */ 836 if (!params.len) 837 goto cmd; 838 839 p = u64_to_user_ptr(params.uaddr); 840 if (p) { 841 if (params.len > SEV_FW_BLOB_MAX_SIZE) 842 return -EINVAL; 843 844 blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); 845 if (!blob) 846 return -ENOMEM; 847 848 data.address = __psp_pa(blob); 849 data.len = params.len; 850 } 851 852 cmd: 853 data.handle = sev->handle; 854 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error); 855 856 /* 857 * If we query the session length, FW responded with expected data. 858 */ 859 if (!params.len) 860 goto done; 861 862 if (ret) 863 goto e_free_blob; 864 865 if (blob) { 866 if (copy_to_user(p, blob, params.len)) 867 ret = -EFAULT; 868 } 869 870 done: 871 params.len = data.len; 872 if (copy_to_user(measure, ¶ms, sizeof(params))) 873 ret = -EFAULT; 874 e_free_blob: 875 kfree(blob); 876 return ret; 877 } 878 879 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 880 { 881 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 882 struct sev_data_launch_finish data; 883 884 if (!sev_guest(kvm)) 885 return -ENOTTY; 886 887 data.handle = sev->handle; 888 return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error); 889 } 890 891 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) 892 { 893 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 894 struct kvm_sev_guest_status params; 895 struct sev_data_guest_status data; 896 int ret; 897 898 if (!sev_guest(kvm)) 899 return -ENOTTY; 900 901 memset(&data, 0, sizeof(data)); 902 903 data.handle = sev->handle; 904 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error); 905 if (ret) 906 return ret; 907 908 params.policy = data.policy; 909 params.state = data.state; 910 params.handle = data.handle; 911 912 if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, sizeof(params))) 913 ret = -EFAULT; 914 915 return ret; 916 } 917 918 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, 919 unsigned long dst, int size, 920 int *error, bool enc) 921 { 922 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 923 struct sev_data_dbg data; 924 925 data.reserved = 0; 926 data.handle = sev->handle; 927 data.dst_addr = dst; 928 data.src_addr = src; 929 data.len = size; 930 931 return sev_issue_cmd(kvm, 932 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, 933 &data, error); 934 } 935 936 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, 937 unsigned long dst_paddr, int sz, int *err) 938 { 939 int offset; 940 941 /* 942 * Its safe to read more than we are asked, caller should ensure that 943 * destination has enough space. 944 */ 945 offset = src_paddr & 15; 946 src_paddr = round_down(src_paddr, 16); 947 sz = round_up(sz + offset, 16); 948 949 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); 950 } 951 952 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, 953 void __user *dst_uaddr, 954 unsigned long dst_paddr, 955 int size, int *err) 956 { 957 struct page *tpage = NULL; 958 int ret, offset; 959 960 /* if inputs are not 16-byte then use intermediate buffer */ 961 if (!IS_ALIGNED(dst_paddr, 16) || 962 !IS_ALIGNED(paddr, 16) || 963 !IS_ALIGNED(size, 16)) { 964 tpage = (void *)alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 965 if (!tpage) 966 return -ENOMEM; 967 968 dst_paddr = __sme_page_pa(tpage); 969 } 970 971 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err); 972 if (ret) 973 goto e_free; 974 975 if (tpage) { 976 offset = paddr & 15; 977 if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size)) 978 ret = -EFAULT; 979 } 980 981 e_free: 982 if (tpage) 983 __free_page(tpage); 984 985 return ret; 986 } 987 988 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, 989 void __user *vaddr, 990 unsigned long dst_paddr, 991 void __user *dst_vaddr, 992 int size, int *error) 993 { 994 struct page *src_tpage = NULL; 995 struct page *dst_tpage = NULL; 996 int ret, len = size; 997 998 /* If source buffer is not aligned then use an intermediate buffer */ 999 if (!IS_ALIGNED((unsigned long)vaddr, 16)) { 1000 src_tpage = alloc_page(GFP_KERNEL_ACCOUNT); 1001 if (!src_tpage) 1002 return -ENOMEM; 1003 1004 if (copy_from_user(page_address(src_tpage), vaddr, size)) { 1005 __free_page(src_tpage); 1006 return -EFAULT; 1007 } 1008 1009 paddr = __sme_page_pa(src_tpage); 1010 } 1011 1012 /* 1013 * If destination buffer or length is not aligned then do read-modify-write: 1014 * - decrypt destination in an intermediate buffer 1015 * - copy the source buffer in an intermediate buffer 1016 * - use the intermediate buffer as source buffer 1017 */ 1018 if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { 1019 int dst_offset; 1020 1021 dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT); 1022 if (!dst_tpage) { 1023 ret = -ENOMEM; 1024 goto e_free; 1025 } 1026 1027 ret = __sev_dbg_decrypt(kvm, dst_paddr, 1028 __sme_page_pa(dst_tpage), size, error); 1029 if (ret) 1030 goto e_free; 1031 1032 /* 1033 * If source is kernel buffer then use memcpy() otherwise 1034 * copy_from_user(). 1035 */ 1036 dst_offset = dst_paddr & 15; 1037 1038 if (src_tpage) 1039 memcpy(page_address(dst_tpage) + dst_offset, 1040 page_address(src_tpage), size); 1041 else { 1042 if (copy_from_user(page_address(dst_tpage) + dst_offset, 1043 vaddr, size)) { 1044 ret = -EFAULT; 1045 goto e_free; 1046 } 1047 } 1048 1049 paddr = __sme_page_pa(dst_tpage); 1050 dst_paddr = round_down(dst_paddr, 16); 1051 len = round_up(size, 16); 1052 } 1053 1054 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true); 1055 1056 e_free: 1057 if (src_tpage) 1058 __free_page(src_tpage); 1059 if (dst_tpage) 1060 __free_page(dst_tpage); 1061 return ret; 1062 } 1063 1064 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) 1065 { 1066 unsigned long vaddr, vaddr_end, next_vaddr; 1067 unsigned long dst_vaddr; 1068 struct page **src_p, **dst_p; 1069 struct kvm_sev_dbg debug; 1070 unsigned long n; 1071 unsigned int size; 1072 int ret; 1073 1074 if (!sev_guest(kvm)) 1075 return -ENOTTY; 1076 1077 if (copy_from_user(&debug, u64_to_user_ptr(argp->data), sizeof(debug))) 1078 return -EFAULT; 1079 1080 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) 1081 return -EINVAL; 1082 if (!debug.dst_uaddr) 1083 return -EINVAL; 1084 1085 vaddr = debug.src_uaddr; 1086 size = debug.len; 1087 vaddr_end = vaddr + size; 1088 dst_vaddr = debug.dst_uaddr; 1089 1090 for (; vaddr < vaddr_end; vaddr = next_vaddr) { 1091 int len, s_off, d_off; 1092 1093 /* lock userspace source and destination page */ 1094 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0); 1095 if (IS_ERR(src_p)) 1096 return PTR_ERR(src_p); 1097 1098 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1); 1099 if (IS_ERR(dst_p)) { 1100 sev_unpin_memory(kvm, src_p, n); 1101 return PTR_ERR(dst_p); 1102 } 1103 1104 /* 1105 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify 1106 * the pages; flush the destination too so that future accesses do not 1107 * see stale data. 1108 */ 1109 sev_clflush_pages(src_p, 1); 1110 sev_clflush_pages(dst_p, 1); 1111 1112 /* 1113 * Since user buffer may not be page aligned, calculate the 1114 * offset within the page. 1115 */ 1116 s_off = vaddr & ~PAGE_MASK; 1117 d_off = dst_vaddr & ~PAGE_MASK; 1118 len = min_t(size_t, (PAGE_SIZE - s_off), size); 1119 1120 if (dec) 1121 ret = __sev_dbg_decrypt_user(kvm, 1122 __sme_page_pa(src_p[0]) + s_off, 1123 (void __user *)dst_vaddr, 1124 __sme_page_pa(dst_p[0]) + d_off, 1125 len, &argp->error); 1126 else 1127 ret = __sev_dbg_encrypt_user(kvm, 1128 __sme_page_pa(src_p[0]) + s_off, 1129 (void __user *)vaddr, 1130 __sme_page_pa(dst_p[0]) + d_off, 1131 (void __user *)dst_vaddr, 1132 len, &argp->error); 1133 1134 sev_unpin_memory(kvm, src_p, n); 1135 sev_unpin_memory(kvm, dst_p, n); 1136 1137 if (ret) 1138 goto err; 1139 1140 next_vaddr = vaddr + len; 1141 dst_vaddr = dst_vaddr + len; 1142 size -= len; 1143 } 1144 err: 1145 return ret; 1146 } 1147 1148 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) 1149 { 1150 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1151 struct sev_data_launch_secret data; 1152 struct kvm_sev_launch_secret params; 1153 struct page **pages; 1154 void *blob, *hdr; 1155 unsigned long n, i; 1156 int ret, offset; 1157 1158 if (!sev_guest(kvm)) 1159 return -ENOTTY; 1160 1161 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) 1162 return -EFAULT; 1163 1164 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); 1165 if (IS_ERR(pages)) 1166 return PTR_ERR(pages); 1167 1168 /* 1169 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in 1170 * place; the cache may contain the data that was written unencrypted. 1171 */ 1172 sev_clflush_pages(pages, n); 1173 1174 /* 1175 * The secret must be copied into contiguous memory region, lets verify 1176 * that userspace memory pages are contiguous before we issue command. 1177 */ 1178 if (get_num_contig_pages(0, pages, n) != n) { 1179 ret = -EINVAL; 1180 goto e_unpin_memory; 1181 } 1182 1183 memset(&data, 0, sizeof(data)); 1184 1185 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1186 data.guest_address = __sme_page_pa(pages[0]) + offset; 1187 data.guest_len = params.guest_len; 1188 1189 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1190 if (IS_ERR(blob)) { 1191 ret = PTR_ERR(blob); 1192 goto e_unpin_memory; 1193 } 1194 1195 data.trans_address = __psp_pa(blob); 1196 data.trans_len = params.trans_len; 1197 1198 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1199 if (IS_ERR(hdr)) { 1200 ret = PTR_ERR(hdr); 1201 goto e_free_blob; 1202 } 1203 data.hdr_address = __psp_pa(hdr); 1204 data.hdr_len = params.hdr_len; 1205 1206 data.handle = sev->handle; 1207 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error); 1208 1209 kfree(hdr); 1210 1211 e_free_blob: 1212 kfree(blob); 1213 e_unpin_memory: 1214 /* content of memory is updated, mark pages dirty */ 1215 for (i = 0; i < n; i++) { 1216 set_page_dirty_lock(pages[i]); 1217 mark_page_accessed(pages[i]); 1218 } 1219 sev_unpin_memory(kvm, pages, n); 1220 return ret; 1221 } 1222 1223 static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) 1224 { 1225 void __user *report = u64_to_user_ptr(argp->data); 1226 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1227 struct sev_data_attestation_report data; 1228 struct kvm_sev_attestation_report params; 1229 void __user *p; 1230 void *blob = NULL; 1231 int ret; 1232 1233 if (!sev_guest(kvm)) 1234 return -ENOTTY; 1235 1236 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) 1237 return -EFAULT; 1238 1239 memset(&data, 0, sizeof(data)); 1240 1241 /* User wants to query the blob length */ 1242 if (!params.len) 1243 goto cmd; 1244 1245 p = u64_to_user_ptr(params.uaddr); 1246 if (p) { 1247 if (params.len > SEV_FW_BLOB_MAX_SIZE) 1248 return -EINVAL; 1249 1250 blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); 1251 if (!blob) 1252 return -ENOMEM; 1253 1254 data.address = __psp_pa(blob); 1255 data.len = params.len; 1256 memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce)); 1257 } 1258 cmd: 1259 data.handle = sev->handle; 1260 ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error); 1261 /* 1262 * If we query the session length, FW responded with expected data. 1263 */ 1264 if (!params.len) 1265 goto done; 1266 1267 if (ret) 1268 goto e_free_blob; 1269 1270 if (blob) { 1271 if (copy_to_user(p, blob, params.len)) 1272 ret = -EFAULT; 1273 } 1274 1275 done: 1276 params.len = data.len; 1277 if (copy_to_user(report, ¶ms, sizeof(params))) 1278 ret = -EFAULT; 1279 e_free_blob: 1280 kfree(blob); 1281 return ret; 1282 } 1283 1284 /* Userspace wants to query session length. */ 1285 static int 1286 __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp, 1287 struct kvm_sev_send_start *params) 1288 { 1289 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1290 struct sev_data_send_start data; 1291 int ret; 1292 1293 memset(&data, 0, sizeof(data)); 1294 data.handle = sev->handle; 1295 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); 1296 1297 params->session_len = data.session_len; 1298 if (copy_to_user(u64_to_user_ptr(argp->data), params, 1299 sizeof(struct kvm_sev_send_start))) 1300 ret = -EFAULT; 1301 1302 return ret; 1303 } 1304 1305 static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 1306 { 1307 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1308 struct sev_data_send_start data; 1309 struct kvm_sev_send_start params; 1310 void *amd_certs, *session_data; 1311 void *pdh_cert, *plat_certs; 1312 int ret; 1313 1314 if (!sev_guest(kvm)) 1315 return -ENOTTY; 1316 1317 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), 1318 sizeof(struct kvm_sev_send_start))) 1319 return -EFAULT; 1320 1321 /* if session_len is zero, userspace wants to query the session length */ 1322 if (!params.session_len) 1323 return __sev_send_start_query_session_length(kvm, argp, 1324 ¶ms); 1325 1326 /* some sanity checks */ 1327 if (!params.pdh_cert_uaddr || !params.pdh_cert_len || 1328 !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE) 1329 return -EINVAL; 1330 1331 /* allocate the memory to hold the session data blob */ 1332 session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT); 1333 if (!session_data) 1334 return -ENOMEM; 1335 1336 /* copy the certificate blobs from userspace */ 1337 pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr, 1338 params.pdh_cert_len); 1339 if (IS_ERR(pdh_cert)) { 1340 ret = PTR_ERR(pdh_cert); 1341 goto e_free_session; 1342 } 1343 1344 plat_certs = psp_copy_user_blob(params.plat_certs_uaddr, 1345 params.plat_certs_len); 1346 if (IS_ERR(plat_certs)) { 1347 ret = PTR_ERR(plat_certs); 1348 goto e_free_pdh; 1349 } 1350 1351 amd_certs = psp_copy_user_blob(params.amd_certs_uaddr, 1352 params.amd_certs_len); 1353 if (IS_ERR(amd_certs)) { 1354 ret = PTR_ERR(amd_certs); 1355 goto e_free_plat_cert; 1356 } 1357 1358 /* populate the FW SEND_START field with system physical address */ 1359 memset(&data, 0, sizeof(data)); 1360 data.pdh_cert_address = __psp_pa(pdh_cert); 1361 data.pdh_cert_len = params.pdh_cert_len; 1362 data.plat_certs_address = __psp_pa(plat_certs); 1363 data.plat_certs_len = params.plat_certs_len; 1364 data.amd_certs_address = __psp_pa(amd_certs); 1365 data.amd_certs_len = params.amd_certs_len; 1366 data.session_address = __psp_pa(session_data); 1367 data.session_len = params.session_len; 1368 data.handle = sev->handle; 1369 1370 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error); 1371 1372 if (!ret && copy_to_user(u64_to_user_ptr(params.session_uaddr), 1373 session_data, params.session_len)) { 1374 ret = -EFAULT; 1375 goto e_free_amd_cert; 1376 } 1377 1378 params.policy = data.policy; 1379 params.session_len = data.session_len; 1380 if (copy_to_user(u64_to_user_ptr(argp->data), ¶ms, 1381 sizeof(struct kvm_sev_send_start))) 1382 ret = -EFAULT; 1383 1384 e_free_amd_cert: 1385 kfree(amd_certs); 1386 e_free_plat_cert: 1387 kfree(plat_certs); 1388 e_free_pdh: 1389 kfree(pdh_cert); 1390 e_free_session: 1391 kfree(session_data); 1392 return ret; 1393 } 1394 1395 /* Userspace wants to query either header or trans length. */ 1396 static int 1397 __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp, 1398 struct kvm_sev_send_update_data *params) 1399 { 1400 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1401 struct sev_data_send_update_data data; 1402 int ret; 1403 1404 memset(&data, 0, sizeof(data)); 1405 data.handle = sev->handle; 1406 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); 1407 1408 params->hdr_len = data.hdr_len; 1409 params->trans_len = data.trans_len; 1410 1411 if (copy_to_user(u64_to_user_ptr(argp->data), params, 1412 sizeof(struct kvm_sev_send_update_data))) 1413 ret = -EFAULT; 1414 1415 return ret; 1416 } 1417 1418 static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 1419 { 1420 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1421 struct sev_data_send_update_data data; 1422 struct kvm_sev_send_update_data params; 1423 void *hdr, *trans_data; 1424 struct page **guest_page; 1425 unsigned long n; 1426 int ret, offset; 1427 1428 if (!sev_guest(kvm)) 1429 return -ENOTTY; 1430 1431 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), 1432 sizeof(struct kvm_sev_send_update_data))) 1433 return -EFAULT; 1434 1435 /* userspace wants to query either header or trans length */ 1436 if (!params.trans_len || !params.hdr_len) 1437 return __sev_send_update_data_query_lengths(kvm, argp, ¶ms); 1438 1439 if (!params.trans_uaddr || !params.guest_uaddr || 1440 !params.guest_len || !params.hdr_uaddr) 1441 return -EINVAL; 1442 1443 /* Check if we are crossing the page boundary */ 1444 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1445 if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE) 1446 return -EINVAL; 1447 1448 /* Pin guest memory */ 1449 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1450 PAGE_SIZE, &n, 0); 1451 if (IS_ERR(guest_page)) 1452 return PTR_ERR(guest_page); 1453 1454 /* allocate memory for header and transport buffer */ 1455 ret = -ENOMEM; 1456 hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); 1457 if (!hdr) 1458 goto e_unpin; 1459 1460 trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); 1461 if (!trans_data) 1462 goto e_free_hdr; 1463 1464 memset(&data, 0, sizeof(data)); 1465 data.hdr_address = __psp_pa(hdr); 1466 data.hdr_len = params.hdr_len; 1467 data.trans_address = __psp_pa(trans_data); 1468 data.trans_len = params.trans_len; 1469 1470 /* The SEND_UPDATE_DATA command requires C-bit to be always set. */ 1471 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; 1472 data.guest_address |= sev_me_mask; 1473 data.guest_len = params.guest_len; 1474 data.handle = sev->handle; 1475 1476 ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error); 1477 1478 if (ret) 1479 goto e_free_trans_data; 1480 1481 /* copy transport buffer to user space */ 1482 if (copy_to_user(u64_to_user_ptr(params.trans_uaddr), 1483 trans_data, params.trans_len)) { 1484 ret = -EFAULT; 1485 goto e_free_trans_data; 1486 } 1487 1488 /* Copy packet header to userspace. */ 1489 if (copy_to_user(u64_to_user_ptr(params.hdr_uaddr), hdr, 1490 params.hdr_len)) 1491 ret = -EFAULT; 1492 1493 e_free_trans_data: 1494 kfree(trans_data); 1495 e_free_hdr: 1496 kfree(hdr); 1497 e_unpin: 1498 sev_unpin_memory(kvm, guest_page, n); 1499 1500 return ret; 1501 } 1502 1503 static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 1504 { 1505 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1506 struct sev_data_send_finish data; 1507 1508 if (!sev_guest(kvm)) 1509 return -ENOTTY; 1510 1511 data.handle = sev->handle; 1512 return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error); 1513 } 1514 1515 static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp) 1516 { 1517 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1518 struct sev_data_send_cancel data; 1519 1520 if (!sev_guest(kvm)) 1521 return -ENOTTY; 1522 1523 data.handle = sev->handle; 1524 return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error); 1525 } 1526 1527 static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 1528 { 1529 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1530 struct sev_data_receive_start start; 1531 struct kvm_sev_receive_start params; 1532 int *error = &argp->error; 1533 void *session_data; 1534 void *pdh_data; 1535 int ret; 1536 1537 if (!sev_guest(kvm)) 1538 return -ENOTTY; 1539 1540 /* Get parameter from the userspace */ 1541 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), 1542 sizeof(struct kvm_sev_receive_start))) 1543 return -EFAULT; 1544 1545 /* some sanity checks */ 1546 if (!params.pdh_uaddr || !params.pdh_len || 1547 !params.session_uaddr || !params.session_len) 1548 return -EINVAL; 1549 1550 pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len); 1551 if (IS_ERR(pdh_data)) 1552 return PTR_ERR(pdh_data); 1553 1554 session_data = psp_copy_user_blob(params.session_uaddr, 1555 params.session_len); 1556 if (IS_ERR(session_data)) { 1557 ret = PTR_ERR(session_data); 1558 goto e_free_pdh; 1559 } 1560 1561 memset(&start, 0, sizeof(start)); 1562 start.handle = params.handle; 1563 start.policy = params.policy; 1564 start.pdh_cert_address = __psp_pa(pdh_data); 1565 start.pdh_cert_len = params.pdh_len; 1566 start.session_address = __psp_pa(session_data); 1567 start.session_len = params.session_len; 1568 1569 /* create memory encryption context */ 1570 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start, 1571 error); 1572 if (ret) 1573 goto e_free_session; 1574 1575 /* Bind ASID to this guest */ 1576 ret = sev_bind_asid(kvm, start.handle, error); 1577 if (ret) { 1578 sev_decommission(start.handle); 1579 goto e_free_session; 1580 } 1581 1582 params.handle = start.handle; 1583 if (copy_to_user(u64_to_user_ptr(argp->data), 1584 ¶ms, sizeof(struct kvm_sev_receive_start))) { 1585 ret = -EFAULT; 1586 sev_unbind_asid(kvm, start.handle); 1587 goto e_free_session; 1588 } 1589 1590 sev->handle = start.handle; 1591 sev->fd = argp->sev_fd; 1592 1593 e_free_session: 1594 kfree(session_data); 1595 e_free_pdh: 1596 kfree(pdh_data); 1597 1598 return ret; 1599 } 1600 1601 static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 1602 { 1603 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1604 struct kvm_sev_receive_update_data params; 1605 struct sev_data_receive_update_data data; 1606 void *hdr = NULL, *trans = NULL; 1607 struct page **guest_page; 1608 unsigned long n; 1609 int ret, offset; 1610 1611 if (!sev_guest(kvm)) 1612 return -EINVAL; 1613 1614 if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), 1615 sizeof(struct kvm_sev_receive_update_data))) 1616 return -EFAULT; 1617 1618 if (!params.hdr_uaddr || !params.hdr_len || 1619 !params.guest_uaddr || !params.guest_len || 1620 !params.trans_uaddr || !params.trans_len) 1621 return -EINVAL; 1622 1623 /* Check if we are crossing the page boundary */ 1624 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1625 if (params.guest_len > PAGE_SIZE || (params.guest_len + offset) > PAGE_SIZE) 1626 return -EINVAL; 1627 1628 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1629 if (IS_ERR(hdr)) 1630 return PTR_ERR(hdr); 1631 1632 trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1633 if (IS_ERR(trans)) { 1634 ret = PTR_ERR(trans); 1635 goto e_free_hdr; 1636 } 1637 1638 memset(&data, 0, sizeof(data)); 1639 data.hdr_address = __psp_pa(hdr); 1640 data.hdr_len = params.hdr_len; 1641 data.trans_address = __psp_pa(trans); 1642 data.trans_len = params.trans_len; 1643 1644 /* Pin guest memory */ 1645 guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, 1646 PAGE_SIZE, &n, 1); 1647 if (IS_ERR(guest_page)) { 1648 ret = PTR_ERR(guest_page); 1649 goto e_free_trans; 1650 } 1651 1652 /* 1653 * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP 1654 * encrypts the written data with the guest's key, and the cache may 1655 * contain dirty, unencrypted data. 1656 */ 1657 sev_clflush_pages(guest_page, n); 1658 1659 /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ 1660 data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; 1661 data.guest_address |= sev_me_mask; 1662 data.guest_len = params.guest_len; 1663 data.handle = sev->handle; 1664 1665 ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data, 1666 &argp->error); 1667 1668 sev_unpin_memory(kvm, guest_page, n); 1669 1670 e_free_trans: 1671 kfree(trans); 1672 e_free_hdr: 1673 kfree(hdr); 1674 1675 return ret; 1676 } 1677 1678 static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 1679 { 1680 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1681 struct sev_data_receive_finish data; 1682 1683 if (!sev_guest(kvm)) 1684 return -ENOTTY; 1685 1686 data.handle = sev->handle; 1687 return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error); 1688 } 1689 1690 static bool is_cmd_allowed_from_mirror(u32 cmd_id) 1691 { 1692 /* 1693 * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES 1694 * active mirror VMs. Also allow the debugging and status commands. 1695 */ 1696 if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA || 1697 cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT || 1698 cmd_id == KVM_SEV_DBG_ENCRYPT) 1699 return true; 1700 1701 return false; 1702 } 1703 1704 static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) 1705 { 1706 struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; 1707 struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; 1708 int r = -EBUSY; 1709 1710 if (dst_kvm == src_kvm) 1711 return -EINVAL; 1712 1713 /* 1714 * Bail if these VMs are already involved in a migration to avoid 1715 * deadlock between two VMs trying to migrate to/from each other. 1716 */ 1717 if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1)) 1718 return -EBUSY; 1719 1720 if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1)) 1721 goto release_dst; 1722 1723 r = -EINTR; 1724 if (mutex_lock_killable(&dst_kvm->lock)) 1725 goto release_src; 1726 if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) 1727 goto unlock_dst; 1728 return 0; 1729 1730 unlock_dst: 1731 mutex_unlock(&dst_kvm->lock); 1732 release_src: 1733 atomic_set_release(&src_sev->migration_in_progress, 0); 1734 release_dst: 1735 atomic_set_release(&dst_sev->migration_in_progress, 0); 1736 return r; 1737 } 1738 1739 static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) 1740 { 1741 struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info; 1742 struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info; 1743 1744 mutex_unlock(&dst_kvm->lock); 1745 mutex_unlock(&src_kvm->lock); 1746 atomic_set_release(&dst_sev->migration_in_progress, 0); 1747 atomic_set_release(&src_sev->migration_in_progress, 0); 1748 } 1749 1750 /* vCPU mutex subclasses. */ 1751 enum sev_migration_role { 1752 SEV_MIGRATION_SOURCE = 0, 1753 SEV_MIGRATION_TARGET, 1754 SEV_NR_MIGRATION_ROLES, 1755 }; 1756 1757 static int sev_lock_vcpus_for_migration(struct kvm *kvm, 1758 enum sev_migration_role role) 1759 { 1760 struct kvm_vcpu *vcpu; 1761 unsigned long i, j; 1762 1763 kvm_for_each_vcpu(i, vcpu, kvm) { 1764 if (mutex_lock_killable_nested(&vcpu->mutex, role)) 1765 goto out_unlock; 1766 1767 #ifdef CONFIG_PROVE_LOCKING 1768 if (!i) 1769 /* 1770 * Reset the role to one that avoids colliding with 1771 * the role used for the first vcpu mutex. 1772 */ 1773 role = SEV_NR_MIGRATION_ROLES; 1774 else 1775 mutex_release(&vcpu->mutex.dep_map, _THIS_IP_); 1776 #endif 1777 } 1778 1779 return 0; 1780 1781 out_unlock: 1782 1783 kvm_for_each_vcpu(j, vcpu, kvm) { 1784 if (i == j) 1785 break; 1786 1787 #ifdef CONFIG_PROVE_LOCKING 1788 if (j) 1789 mutex_acquire(&vcpu->mutex.dep_map, role, 0, _THIS_IP_); 1790 #endif 1791 1792 mutex_unlock(&vcpu->mutex); 1793 } 1794 return -EINTR; 1795 } 1796 1797 static void sev_unlock_vcpus_for_migration(struct kvm *kvm) 1798 { 1799 struct kvm_vcpu *vcpu; 1800 unsigned long i; 1801 bool first = true; 1802 1803 kvm_for_each_vcpu(i, vcpu, kvm) { 1804 if (first) 1805 first = false; 1806 else 1807 mutex_acquire(&vcpu->mutex.dep_map, 1808 SEV_NR_MIGRATION_ROLES, 0, _THIS_IP_); 1809 1810 mutex_unlock(&vcpu->mutex); 1811 } 1812 } 1813 1814 static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) 1815 { 1816 struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; 1817 struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; 1818 struct kvm_vcpu *dst_vcpu, *src_vcpu; 1819 struct vcpu_svm *dst_svm, *src_svm; 1820 struct kvm_sev_info *mirror; 1821 unsigned long i; 1822 1823 dst->active = true; 1824 dst->asid = src->asid; 1825 dst->handle = src->handle; 1826 dst->pages_locked = src->pages_locked; 1827 dst->enc_context_owner = src->enc_context_owner; 1828 dst->es_active = src->es_active; 1829 dst->vmsa_features = src->vmsa_features; 1830 1831 src->asid = 0; 1832 src->active = false; 1833 src->handle = 0; 1834 src->pages_locked = 0; 1835 src->enc_context_owner = NULL; 1836 src->es_active = false; 1837 1838 list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); 1839 1840 /* 1841 * If this VM has mirrors, "transfer" each mirror's refcount of the 1842 * source to the destination (this KVM). The caller holds a reference 1843 * to the source, so there's no danger of use-after-free. 1844 */ 1845 list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms); 1846 list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) { 1847 kvm_get_kvm(dst_kvm); 1848 kvm_put_kvm(src_kvm); 1849 mirror->enc_context_owner = dst_kvm; 1850 } 1851 1852 /* 1853 * If this VM is a mirror, remove the old mirror from the owners list 1854 * and add the new mirror to the list. 1855 */ 1856 if (is_mirroring_enc_context(dst_kvm)) { 1857 struct kvm_sev_info *owner_sev_info = 1858 &to_kvm_svm(dst->enc_context_owner)->sev_info; 1859 1860 list_del(&src->mirror_entry); 1861 list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); 1862 } 1863 1864 kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) { 1865 dst_svm = to_svm(dst_vcpu); 1866 1867 sev_init_vmcb(dst_svm); 1868 1869 if (!dst->es_active) 1870 continue; 1871 1872 /* 1873 * Note, the source is not required to have the same number of 1874 * vCPUs as the destination when migrating a vanilla SEV VM. 1875 */ 1876 src_vcpu = kvm_get_vcpu(src_kvm, i); 1877 src_svm = to_svm(src_vcpu); 1878 1879 /* 1880 * Transfer VMSA and GHCB state to the destination. Nullify and 1881 * clear source fields as appropriate, the state now belongs to 1882 * the destination. 1883 */ 1884 memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es)); 1885 dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa; 1886 dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa; 1887 dst_vcpu->arch.guest_state_protected = true; 1888 1889 memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es)); 1890 src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE; 1891 src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; 1892 src_vcpu->arch.guest_state_protected = false; 1893 } 1894 } 1895 1896 static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src) 1897 { 1898 struct kvm_vcpu *src_vcpu; 1899 unsigned long i; 1900 1901 if (!sev_es_guest(src)) 1902 return 0; 1903 1904 if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) 1905 return -EINVAL; 1906 1907 kvm_for_each_vcpu(i, src_vcpu, src) { 1908 if (!src_vcpu->arch.guest_state_protected) 1909 return -EINVAL; 1910 } 1911 1912 return 0; 1913 } 1914 1915 int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) 1916 { 1917 struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; 1918 struct kvm_sev_info *src_sev, *cg_cleanup_sev; 1919 struct fd f = fdget(source_fd); 1920 struct kvm *source_kvm; 1921 bool charged = false; 1922 int ret; 1923 1924 if (!f.file) 1925 return -EBADF; 1926 1927 if (!file_is_kvm(f.file)) { 1928 ret = -EBADF; 1929 goto out_fput; 1930 } 1931 1932 source_kvm = f.file->private_data; 1933 ret = sev_lock_two_vms(kvm, source_kvm); 1934 if (ret) 1935 goto out_fput; 1936 1937 if (kvm->arch.vm_type != source_kvm->arch.vm_type || 1938 sev_guest(kvm) || !sev_guest(source_kvm)) { 1939 ret = -EINVAL; 1940 goto out_unlock; 1941 } 1942 1943 src_sev = &to_kvm_svm(source_kvm)->sev_info; 1944 1945 dst_sev->misc_cg = get_current_misc_cg(); 1946 cg_cleanup_sev = dst_sev; 1947 if (dst_sev->misc_cg != src_sev->misc_cg) { 1948 ret = sev_misc_cg_try_charge(dst_sev); 1949 if (ret) 1950 goto out_dst_cgroup; 1951 charged = true; 1952 } 1953 1954 ret = sev_lock_vcpus_for_migration(kvm, SEV_MIGRATION_SOURCE); 1955 if (ret) 1956 goto out_dst_cgroup; 1957 ret = sev_lock_vcpus_for_migration(source_kvm, SEV_MIGRATION_TARGET); 1958 if (ret) 1959 goto out_dst_vcpu; 1960 1961 ret = sev_check_source_vcpus(kvm, source_kvm); 1962 if (ret) 1963 goto out_source_vcpu; 1964 1965 sev_migrate_from(kvm, source_kvm); 1966 kvm_vm_dead(source_kvm); 1967 cg_cleanup_sev = src_sev; 1968 ret = 0; 1969 1970 out_source_vcpu: 1971 sev_unlock_vcpus_for_migration(source_kvm); 1972 out_dst_vcpu: 1973 sev_unlock_vcpus_for_migration(kvm); 1974 out_dst_cgroup: 1975 /* Operates on the source on success, on the destination on failure. */ 1976 if (charged) 1977 sev_misc_cg_uncharge(cg_cleanup_sev); 1978 put_misc_cg(cg_cleanup_sev->misc_cg); 1979 cg_cleanup_sev->misc_cg = NULL; 1980 out_unlock: 1981 sev_unlock_two_vms(kvm, source_kvm); 1982 out_fput: 1983 fdput(f); 1984 return ret; 1985 } 1986 1987 int sev_dev_get_attr(u32 group, u64 attr, u64 *val) 1988 { 1989 if (group != KVM_X86_GRP_SEV) 1990 return -ENXIO; 1991 1992 switch (attr) { 1993 case KVM_X86_SEV_VMSA_FEATURES: 1994 *val = sev_supported_vmsa_features; 1995 return 0; 1996 1997 default: 1998 return -ENXIO; 1999 } 2000 } 2001 2002 int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) 2003 { 2004 struct kvm_sev_cmd sev_cmd; 2005 int r; 2006 2007 if (!sev_enabled) 2008 return -ENOTTY; 2009 2010 if (!argp) 2011 return 0; 2012 2013 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) 2014 return -EFAULT; 2015 2016 mutex_lock(&kvm->lock); 2017 2018 /* Only the enc_context_owner handles some memory enc operations. */ 2019 if (is_mirroring_enc_context(kvm) && 2020 !is_cmd_allowed_from_mirror(sev_cmd.id)) { 2021 r = -EINVAL; 2022 goto out; 2023 } 2024 2025 switch (sev_cmd.id) { 2026 case KVM_SEV_ES_INIT: 2027 if (!sev_es_enabled) { 2028 r = -ENOTTY; 2029 goto out; 2030 } 2031 fallthrough; 2032 case KVM_SEV_INIT: 2033 r = sev_guest_init(kvm, &sev_cmd); 2034 break; 2035 case KVM_SEV_INIT2: 2036 r = sev_guest_init2(kvm, &sev_cmd); 2037 break; 2038 case KVM_SEV_LAUNCH_START: 2039 r = sev_launch_start(kvm, &sev_cmd); 2040 break; 2041 case KVM_SEV_LAUNCH_UPDATE_DATA: 2042 r = sev_launch_update_data(kvm, &sev_cmd); 2043 break; 2044 case KVM_SEV_LAUNCH_UPDATE_VMSA: 2045 r = sev_launch_update_vmsa(kvm, &sev_cmd); 2046 break; 2047 case KVM_SEV_LAUNCH_MEASURE: 2048 r = sev_launch_measure(kvm, &sev_cmd); 2049 break; 2050 case KVM_SEV_LAUNCH_FINISH: 2051 r = sev_launch_finish(kvm, &sev_cmd); 2052 break; 2053 case KVM_SEV_GUEST_STATUS: 2054 r = sev_guest_status(kvm, &sev_cmd); 2055 break; 2056 case KVM_SEV_DBG_DECRYPT: 2057 r = sev_dbg_crypt(kvm, &sev_cmd, true); 2058 break; 2059 case KVM_SEV_DBG_ENCRYPT: 2060 r = sev_dbg_crypt(kvm, &sev_cmd, false); 2061 break; 2062 case KVM_SEV_LAUNCH_SECRET: 2063 r = sev_launch_secret(kvm, &sev_cmd); 2064 break; 2065 case KVM_SEV_GET_ATTESTATION_REPORT: 2066 r = sev_get_attestation_report(kvm, &sev_cmd); 2067 break; 2068 case KVM_SEV_SEND_START: 2069 r = sev_send_start(kvm, &sev_cmd); 2070 break; 2071 case KVM_SEV_SEND_UPDATE_DATA: 2072 r = sev_send_update_data(kvm, &sev_cmd); 2073 break; 2074 case KVM_SEV_SEND_FINISH: 2075 r = sev_send_finish(kvm, &sev_cmd); 2076 break; 2077 case KVM_SEV_SEND_CANCEL: 2078 r = sev_send_cancel(kvm, &sev_cmd); 2079 break; 2080 case KVM_SEV_RECEIVE_START: 2081 r = sev_receive_start(kvm, &sev_cmd); 2082 break; 2083 case KVM_SEV_RECEIVE_UPDATE_DATA: 2084 r = sev_receive_update_data(kvm, &sev_cmd); 2085 break; 2086 case KVM_SEV_RECEIVE_FINISH: 2087 r = sev_receive_finish(kvm, &sev_cmd); 2088 break; 2089 default: 2090 r = -EINVAL; 2091 goto out; 2092 } 2093 2094 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd))) 2095 r = -EFAULT; 2096 2097 out: 2098 mutex_unlock(&kvm->lock); 2099 return r; 2100 } 2101 2102 int sev_mem_enc_register_region(struct kvm *kvm, 2103 struct kvm_enc_region *range) 2104 { 2105 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2106 struct enc_region *region; 2107 int ret = 0; 2108 2109 if (!sev_guest(kvm)) 2110 return -ENOTTY; 2111 2112 /* If kvm is mirroring encryption context it isn't responsible for it */ 2113 if (is_mirroring_enc_context(kvm)) 2114 return -EINVAL; 2115 2116 if (range->addr > ULONG_MAX || range->size > ULONG_MAX) 2117 return -EINVAL; 2118 2119 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); 2120 if (!region) 2121 return -ENOMEM; 2122 2123 mutex_lock(&kvm->lock); 2124 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); 2125 if (IS_ERR(region->pages)) { 2126 ret = PTR_ERR(region->pages); 2127 mutex_unlock(&kvm->lock); 2128 goto e_free; 2129 } 2130 2131 /* 2132 * The guest may change the memory encryption attribute from C=0 -> C=1 2133 * or vice versa for this memory range. Lets make sure caches are 2134 * flushed to ensure that guest data gets written into memory with 2135 * correct C-bit. Note, this must be done before dropping kvm->lock, 2136 * as region and its array of pages can be freed by a different task 2137 * once kvm->lock is released. 2138 */ 2139 sev_clflush_pages(region->pages, region->npages); 2140 2141 region->uaddr = range->addr; 2142 region->size = range->size; 2143 2144 list_add_tail(®ion->list, &sev->regions_list); 2145 mutex_unlock(&kvm->lock); 2146 2147 return ret; 2148 2149 e_free: 2150 kfree(region); 2151 return ret; 2152 } 2153 2154 static struct enc_region * 2155 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) 2156 { 2157 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2158 struct list_head *head = &sev->regions_list; 2159 struct enc_region *i; 2160 2161 list_for_each_entry(i, head, list) { 2162 if (i->uaddr == range->addr && 2163 i->size == range->size) 2164 return i; 2165 } 2166 2167 return NULL; 2168 } 2169 2170 static void __unregister_enc_region_locked(struct kvm *kvm, 2171 struct enc_region *region) 2172 { 2173 sev_unpin_memory(kvm, region->pages, region->npages); 2174 list_del(®ion->list); 2175 kfree(region); 2176 } 2177 2178 int sev_mem_enc_unregister_region(struct kvm *kvm, 2179 struct kvm_enc_region *range) 2180 { 2181 struct enc_region *region; 2182 int ret; 2183 2184 /* If kvm is mirroring encryption context it isn't responsible for it */ 2185 if (is_mirroring_enc_context(kvm)) 2186 return -EINVAL; 2187 2188 mutex_lock(&kvm->lock); 2189 2190 if (!sev_guest(kvm)) { 2191 ret = -ENOTTY; 2192 goto failed; 2193 } 2194 2195 region = find_enc_region(kvm, range); 2196 if (!region) { 2197 ret = -EINVAL; 2198 goto failed; 2199 } 2200 2201 /* 2202 * Ensure that all guest tagged cache entries are flushed before 2203 * releasing the pages back to the system for use. CLFLUSH will 2204 * not do this, so issue a WBINVD. 2205 */ 2206 wbinvd_on_all_cpus(); 2207 2208 __unregister_enc_region_locked(kvm, region); 2209 2210 mutex_unlock(&kvm->lock); 2211 return 0; 2212 2213 failed: 2214 mutex_unlock(&kvm->lock); 2215 return ret; 2216 } 2217 2218 int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) 2219 { 2220 struct fd f = fdget(source_fd); 2221 struct kvm *source_kvm; 2222 struct kvm_sev_info *source_sev, *mirror_sev; 2223 int ret; 2224 2225 if (!f.file) 2226 return -EBADF; 2227 2228 if (!file_is_kvm(f.file)) { 2229 ret = -EBADF; 2230 goto e_source_fput; 2231 } 2232 2233 source_kvm = f.file->private_data; 2234 ret = sev_lock_two_vms(kvm, source_kvm); 2235 if (ret) 2236 goto e_source_fput; 2237 2238 /* 2239 * Mirrors of mirrors should work, but let's not get silly. Also 2240 * disallow out-of-band SEV/SEV-ES init if the target is already an 2241 * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being 2242 * created after SEV/SEV-ES initialization, e.g. to init intercepts. 2243 */ 2244 if (sev_guest(kvm) || !sev_guest(source_kvm) || 2245 is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) { 2246 ret = -EINVAL; 2247 goto e_unlock; 2248 } 2249 2250 /* 2251 * The mirror kvm holds an enc_context_owner ref so its asid can't 2252 * disappear until we're done with it 2253 */ 2254 source_sev = &to_kvm_svm(source_kvm)->sev_info; 2255 kvm_get_kvm(source_kvm); 2256 mirror_sev = &to_kvm_svm(kvm)->sev_info; 2257 list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); 2258 2259 /* Set enc_context_owner and copy its encryption context over */ 2260 mirror_sev->enc_context_owner = source_kvm; 2261 mirror_sev->active = true; 2262 mirror_sev->asid = source_sev->asid; 2263 mirror_sev->fd = source_sev->fd; 2264 mirror_sev->es_active = source_sev->es_active; 2265 mirror_sev->need_init = false; 2266 mirror_sev->handle = source_sev->handle; 2267 INIT_LIST_HEAD(&mirror_sev->regions_list); 2268 INIT_LIST_HEAD(&mirror_sev->mirror_vms); 2269 ret = 0; 2270 2271 /* 2272 * Do not copy ap_jump_table. Since the mirror does not share the same 2273 * KVM contexts as the original, and they may have different 2274 * memory-views. 2275 */ 2276 2277 e_unlock: 2278 sev_unlock_two_vms(kvm, source_kvm); 2279 e_source_fput: 2280 fdput(f); 2281 return ret; 2282 } 2283 2284 void sev_vm_destroy(struct kvm *kvm) 2285 { 2286 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 2287 struct list_head *head = &sev->regions_list; 2288 struct list_head *pos, *q; 2289 2290 if (!sev_guest(kvm)) 2291 return; 2292 2293 WARN_ON(!list_empty(&sev->mirror_vms)); 2294 2295 /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ 2296 if (is_mirroring_enc_context(kvm)) { 2297 struct kvm *owner_kvm = sev->enc_context_owner; 2298 2299 mutex_lock(&owner_kvm->lock); 2300 list_del(&sev->mirror_entry); 2301 mutex_unlock(&owner_kvm->lock); 2302 kvm_put_kvm(owner_kvm); 2303 return; 2304 } 2305 2306 /* 2307 * Ensure that all guest tagged cache entries are flushed before 2308 * releasing the pages back to the system for use. CLFLUSH will 2309 * not do this, so issue a WBINVD. 2310 */ 2311 wbinvd_on_all_cpus(); 2312 2313 /* 2314 * if userspace was terminated before unregistering the memory regions 2315 * then lets unpin all the registered memory. 2316 */ 2317 if (!list_empty(head)) { 2318 list_for_each_safe(pos, q, head) { 2319 __unregister_enc_region_locked(kvm, 2320 list_entry(pos, struct enc_region, list)); 2321 cond_resched(); 2322 } 2323 } 2324 2325 sev_unbind_asid(kvm, sev->handle); 2326 sev_asid_free(sev); 2327 } 2328 2329 void __init sev_set_cpu_caps(void) 2330 { 2331 if (sev_enabled) { 2332 kvm_cpu_cap_set(X86_FEATURE_SEV); 2333 kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_VM); 2334 } 2335 if (sev_es_enabled) { 2336 kvm_cpu_cap_set(X86_FEATURE_SEV_ES); 2337 kvm_caps.supported_vm_types |= BIT(KVM_X86_SEV_ES_VM); 2338 } 2339 } 2340 2341 void __init sev_hardware_setup(void) 2342 { 2343 unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; 2344 bool sev_es_supported = false; 2345 bool sev_supported = false; 2346 2347 if (!sev_enabled || !npt_enabled || !nrips) 2348 goto out; 2349 2350 /* 2351 * SEV must obviously be supported in hardware. Sanity check that the 2352 * CPU supports decode assists, which is mandatory for SEV guests to 2353 * support instruction emulation. Ditto for flushing by ASID, as SEV 2354 * guests are bound to a single ASID, i.e. KVM can't rotate to a new 2355 * ASID to effect a TLB flush. 2356 */ 2357 if (!boot_cpu_has(X86_FEATURE_SEV) || 2358 WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) || 2359 WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID))) 2360 goto out; 2361 2362 /* Retrieve SEV CPUID information */ 2363 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); 2364 2365 /* Set encryption bit location for SEV-ES guests */ 2366 sev_enc_bit = ebx & 0x3f; 2367 2368 /* Maximum number of encrypted guests supported simultaneously */ 2369 max_sev_asid = ecx; 2370 if (!max_sev_asid) 2371 goto out; 2372 2373 /* Minimum ASID value that should be used for SEV guest */ 2374 min_sev_asid = edx; 2375 sev_me_mask = 1UL << (ebx & 0x3f); 2376 2377 /* 2378 * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, 2379 * even though it's never used, so that the bitmap is indexed by the 2380 * actual ASID. 2381 */ 2382 nr_asids = max_sev_asid + 1; 2383 sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); 2384 if (!sev_asid_bitmap) 2385 goto out; 2386 2387 sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); 2388 if (!sev_reclaim_asid_bitmap) { 2389 bitmap_free(sev_asid_bitmap); 2390 sev_asid_bitmap = NULL; 2391 goto out; 2392 } 2393 2394 if (min_sev_asid <= max_sev_asid) { 2395 sev_asid_count = max_sev_asid - min_sev_asid + 1; 2396 WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); 2397 } 2398 sev_supported = true; 2399 2400 /* SEV-ES support requested? */ 2401 if (!sev_es_enabled) 2402 goto out; 2403 2404 /* 2405 * SEV-ES requires MMIO caching as KVM doesn't have access to the guest 2406 * instruction stream, i.e. can't emulate in response to a #NPF and 2407 * instead relies on #NPF(RSVD) being reflected into the guest as #VC 2408 * (the guest can then do a #VMGEXIT to request MMIO emulation). 2409 */ 2410 if (!enable_mmio_caching) 2411 goto out; 2412 2413 /* Does the CPU support SEV-ES? */ 2414 if (!boot_cpu_has(X86_FEATURE_SEV_ES)) 2415 goto out; 2416 2417 if (!lbrv) { 2418 WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV), 2419 "LBRV must be present for SEV-ES support"); 2420 goto out; 2421 } 2422 2423 /* Has the system been allocated ASIDs for SEV-ES? */ 2424 if (min_sev_asid == 1) 2425 goto out; 2426 2427 sev_es_asid_count = min_sev_asid - 1; 2428 WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count)); 2429 sev_es_supported = true; 2430 2431 out: 2432 if (boot_cpu_has(X86_FEATURE_SEV)) 2433 pr_info("SEV %s (ASIDs %u - %u)\n", 2434 sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : 2435 "unusable" : 2436 "disabled", 2437 min_sev_asid, max_sev_asid); 2438 if (boot_cpu_has(X86_FEATURE_SEV_ES)) 2439 pr_info("SEV-ES %s (ASIDs %u - %u)\n", 2440 sev_es_supported ? "enabled" : "disabled", 2441 min_sev_asid > 1 ? 1 : 0, min_sev_asid - 1); 2442 2443 sev_enabled = sev_supported; 2444 sev_es_enabled = sev_es_supported; 2445 if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || 2446 !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) 2447 sev_es_debug_swap_enabled = false; 2448 2449 sev_supported_vmsa_features = 0; 2450 if (sev_es_debug_swap_enabled) 2451 sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; 2452 } 2453 2454 void sev_hardware_unsetup(void) 2455 { 2456 if (!sev_enabled) 2457 return; 2458 2459 /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ 2460 sev_flush_asids(1, max_sev_asid); 2461 2462 bitmap_free(sev_asid_bitmap); 2463 bitmap_free(sev_reclaim_asid_bitmap); 2464 2465 misc_cg_set_capacity(MISC_CG_RES_SEV, 0); 2466 misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0); 2467 } 2468 2469 int sev_cpu_init(struct svm_cpu_data *sd) 2470 { 2471 if (!sev_enabled) 2472 return 0; 2473 2474 sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); 2475 if (!sd->sev_vmcbs) 2476 return -ENOMEM; 2477 2478 return 0; 2479 } 2480 2481 /* 2482 * Pages used by hardware to hold guest encrypted state must be flushed before 2483 * returning them to the system. 2484 */ 2485 static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) 2486 { 2487 unsigned int asid = sev_get_asid(vcpu->kvm); 2488 2489 /* 2490 * Note! The address must be a kernel address, as regular page walk 2491 * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user 2492 * address is non-deterministic and unsafe. This function deliberately 2493 * takes a pointer to deter passing in a user address. 2494 */ 2495 unsigned long addr = (unsigned long)va; 2496 2497 /* 2498 * If CPU enforced cache coherency for encrypted mappings of the 2499 * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache 2500 * flush is still needed in order to work properly with DMA devices. 2501 */ 2502 if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { 2503 clflush_cache_range(va, PAGE_SIZE); 2504 return; 2505 } 2506 2507 /* 2508 * VM Page Flush takes a host virtual address and a guest ASID. Fall 2509 * back to WBINVD if this faults so as not to make any problems worse 2510 * by leaving stale encrypted data in the cache. 2511 */ 2512 if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) 2513 goto do_wbinvd; 2514 2515 return; 2516 2517 do_wbinvd: 2518 wbinvd_on_all_cpus(); 2519 } 2520 2521 void sev_guest_memory_reclaimed(struct kvm *kvm) 2522 { 2523 if (!sev_guest(kvm)) 2524 return; 2525 2526 wbinvd_on_all_cpus(); 2527 } 2528 2529 void sev_free_vcpu(struct kvm_vcpu *vcpu) 2530 { 2531 struct vcpu_svm *svm; 2532 2533 if (!sev_es_guest(vcpu->kvm)) 2534 return; 2535 2536 svm = to_svm(vcpu); 2537 2538 if (vcpu->arch.guest_state_protected) 2539 sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); 2540 2541 __free_page(virt_to_page(svm->sev_es.vmsa)); 2542 2543 if (svm->sev_es.ghcb_sa_free) 2544 kvfree(svm->sev_es.ghcb_sa); 2545 } 2546 2547 static void dump_ghcb(struct vcpu_svm *svm) 2548 { 2549 struct ghcb *ghcb = svm->sev_es.ghcb; 2550 unsigned int nbits; 2551 2552 /* Re-use the dump_invalid_vmcb module parameter */ 2553 if (!dump_invalid_vmcb) { 2554 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); 2555 return; 2556 } 2557 2558 nbits = sizeof(ghcb->save.valid_bitmap) * 8; 2559 2560 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); 2561 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", 2562 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); 2563 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", 2564 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); 2565 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", 2566 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); 2567 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", 2568 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); 2569 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); 2570 } 2571 2572 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) 2573 { 2574 struct kvm_vcpu *vcpu = &svm->vcpu; 2575 struct ghcb *ghcb = svm->sev_es.ghcb; 2576 2577 /* 2578 * The GHCB protocol so far allows for the following data 2579 * to be returned: 2580 * GPRs RAX, RBX, RCX, RDX 2581 * 2582 * Copy their values, even if they may not have been written during the 2583 * VM-Exit. It's the guest's responsibility to not consume random data. 2584 */ 2585 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); 2586 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); 2587 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); 2588 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); 2589 } 2590 2591 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) 2592 { 2593 struct vmcb_control_area *control = &svm->vmcb->control; 2594 struct kvm_vcpu *vcpu = &svm->vcpu; 2595 struct ghcb *ghcb = svm->sev_es.ghcb; 2596 u64 exit_code; 2597 2598 /* 2599 * The GHCB protocol so far allows for the following data 2600 * to be supplied: 2601 * GPRs RAX, RBX, RCX, RDX 2602 * XCR0 2603 * CPL 2604 * 2605 * VMMCALL allows the guest to provide extra registers. KVM also 2606 * expects RSI for hypercalls, so include that, too. 2607 * 2608 * Copy their values to the appropriate location if supplied. 2609 */ 2610 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); 2611 2612 BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap)); 2613 memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap)); 2614 2615 vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb); 2616 vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb); 2617 vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb); 2618 vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb); 2619 vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb); 2620 2621 svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb); 2622 2623 if (kvm_ghcb_xcr0_is_valid(svm)) { 2624 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); 2625 kvm_update_cpuid_runtime(vcpu); 2626 } 2627 2628 /* Copy the GHCB exit information into the VMCB fields */ 2629 exit_code = ghcb_get_sw_exit_code(ghcb); 2630 control->exit_code = lower_32_bits(exit_code); 2631 control->exit_code_hi = upper_32_bits(exit_code); 2632 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); 2633 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); 2634 svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb); 2635 2636 /* Clear the valid entries fields */ 2637 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 2638 } 2639 2640 static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) 2641 { 2642 return (((u64)control->exit_code_hi) << 32) | control->exit_code; 2643 } 2644 2645 static int sev_es_validate_vmgexit(struct vcpu_svm *svm) 2646 { 2647 struct vmcb_control_area *control = &svm->vmcb->control; 2648 struct kvm_vcpu *vcpu = &svm->vcpu; 2649 u64 exit_code; 2650 u64 reason; 2651 2652 /* 2653 * Retrieve the exit code now even though it may not be marked valid 2654 * as it could help with debugging. 2655 */ 2656 exit_code = kvm_ghcb_get_sw_exit_code(control); 2657 2658 /* Only GHCB Usage code 0 is supported */ 2659 if (svm->sev_es.ghcb->ghcb_usage) { 2660 reason = GHCB_ERR_INVALID_USAGE; 2661 goto vmgexit_err; 2662 } 2663 2664 reason = GHCB_ERR_MISSING_INPUT; 2665 2666 if (!kvm_ghcb_sw_exit_code_is_valid(svm) || 2667 !kvm_ghcb_sw_exit_info_1_is_valid(svm) || 2668 !kvm_ghcb_sw_exit_info_2_is_valid(svm)) 2669 goto vmgexit_err; 2670 2671 switch (exit_code) { 2672 case SVM_EXIT_READ_DR7: 2673 break; 2674 case SVM_EXIT_WRITE_DR7: 2675 if (!kvm_ghcb_rax_is_valid(svm)) 2676 goto vmgexit_err; 2677 break; 2678 case SVM_EXIT_RDTSC: 2679 break; 2680 case SVM_EXIT_RDPMC: 2681 if (!kvm_ghcb_rcx_is_valid(svm)) 2682 goto vmgexit_err; 2683 break; 2684 case SVM_EXIT_CPUID: 2685 if (!kvm_ghcb_rax_is_valid(svm) || 2686 !kvm_ghcb_rcx_is_valid(svm)) 2687 goto vmgexit_err; 2688 if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd) 2689 if (!kvm_ghcb_xcr0_is_valid(svm)) 2690 goto vmgexit_err; 2691 break; 2692 case SVM_EXIT_INVD: 2693 break; 2694 case SVM_EXIT_IOIO: 2695 if (control->exit_info_1 & SVM_IOIO_STR_MASK) { 2696 if (!kvm_ghcb_sw_scratch_is_valid(svm)) 2697 goto vmgexit_err; 2698 } else { 2699 if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK)) 2700 if (!kvm_ghcb_rax_is_valid(svm)) 2701 goto vmgexit_err; 2702 } 2703 break; 2704 case SVM_EXIT_MSR: 2705 if (!kvm_ghcb_rcx_is_valid(svm)) 2706 goto vmgexit_err; 2707 if (control->exit_info_1) { 2708 if (!kvm_ghcb_rax_is_valid(svm) || 2709 !kvm_ghcb_rdx_is_valid(svm)) 2710 goto vmgexit_err; 2711 } 2712 break; 2713 case SVM_EXIT_VMMCALL: 2714 if (!kvm_ghcb_rax_is_valid(svm) || 2715 !kvm_ghcb_cpl_is_valid(svm)) 2716 goto vmgexit_err; 2717 break; 2718 case SVM_EXIT_RDTSCP: 2719 break; 2720 case SVM_EXIT_WBINVD: 2721 break; 2722 case SVM_EXIT_MONITOR: 2723 if (!kvm_ghcb_rax_is_valid(svm) || 2724 !kvm_ghcb_rcx_is_valid(svm) || 2725 !kvm_ghcb_rdx_is_valid(svm)) 2726 goto vmgexit_err; 2727 break; 2728 case SVM_EXIT_MWAIT: 2729 if (!kvm_ghcb_rax_is_valid(svm) || 2730 !kvm_ghcb_rcx_is_valid(svm)) 2731 goto vmgexit_err; 2732 break; 2733 case SVM_VMGEXIT_MMIO_READ: 2734 case SVM_VMGEXIT_MMIO_WRITE: 2735 if (!kvm_ghcb_sw_scratch_is_valid(svm)) 2736 goto vmgexit_err; 2737 break; 2738 case SVM_VMGEXIT_NMI_COMPLETE: 2739 case SVM_VMGEXIT_AP_HLT_LOOP: 2740 case SVM_VMGEXIT_AP_JUMP_TABLE: 2741 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 2742 case SVM_VMGEXIT_HV_FEATURES: 2743 case SVM_VMGEXIT_TERM_REQUEST: 2744 break; 2745 default: 2746 reason = GHCB_ERR_INVALID_EVENT; 2747 goto vmgexit_err; 2748 } 2749 2750 return 0; 2751 2752 vmgexit_err: 2753 if (reason == GHCB_ERR_INVALID_USAGE) { 2754 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", 2755 svm->sev_es.ghcb->ghcb_usage); 2756 } else if (reason == GHCB_ERR_INVALID_EVENT) { 2757 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", 2758 exit_code); 2759 } else { 2760 vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n", 2761 exit_code); 2762 dump_ghcb(svm); 2763 } 2764 2765 ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); 2766 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason); 2767 2768 /* Resume the guest to "return" the error code. */ 2769 return 1; 2770 } 2771 2772 void sev_es_unmap_ghcb(struct vcpu_svm *svm) 2773 { 2774 /* Clear any indication that the vCPU is in a type of AP Reset Hold */ 2775 svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NONE; 2776 2777 if (!svm->sev_es.ghcb) 2778 return; 2779 2780 if (svm->sev_es.ghcb_sa_free) { 2781 /* 2782 * The scratch area lives outside the GHCB, so there is a 2783 * buffer that, depending on the operation performed, may 2784 * need to be synced, then freed. 2785 */ 2786 if (svm->sev_es.ghcb_sa_sync) { 2787 kvm_write_guest(svm->vcpu.kvm, 2788 svm->sev_es.sw_scratch, 2789 svm->sev_es.ghcb_sa, 2790 svm->sev_es.ghcb_sa_len); 2791 svm->sev_es.ghcb_sa_sync = false; 2792 } 2793 2794 kvfree(svm->sev_es.ghcb_sa); 2795 svm->sev_es.ghcb_sa = NULL; 2796 svm->sev_es.ghcb_sa_free = false; 2797 } 2798 2799 trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb); 2800 2801 sev_es_sync_to_ghcb(svm); 2802 2803 kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true); 2804 svm->sev_es.ghcb = NULL; 2805 } 2806 2807 void pre_sev_run(struct vcpu_svm *svm, int cpu) 2808 { 2809 struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); 2810 unsigned int asid = sev_get_asid(svm->vcpu.kvm); 2811 2812 /* Assign the asid allocated with this SEV guest */ 2813 svm->asid = asid; 2814 2815 /* 2816 * Flush guest TLB: 2817 * 2818 * 1) when different VMCB for the same ASID is to be run on the same host CPU. 2819 * 2) or this VMCB was executed on different host CPU in previous VMRUNs. 2820 */ 2821 if (sd->sev_vmcbs[asid] == svm->vmcb && 2822 svm->vcpu.arch.last_vmentry_cpu == cpu) 2823 return; 2824 2825 sd->sev_vmcbs[asid] = svm->vmcb; 2826 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 2827 vmcb_mark_dirty(svm->vmcb, VMCB_ASID); 2828 } 2829 2830 #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) 2831 static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) 2832 { 2833 struct vmcb_control_area *control = &svm->vmcb->control; 2834 u64 ghcb_scratch_beg, ghcb_scratch_end; 2835 u64 scratch_gpa_beg, scratch_gpa_end; 2836 void *scratch_va; 2837 2838 scratch_gpa_beg = svm->sev_es.sw_scratch; 2839 if (!scratch_gpa_beg) { 2840 pr_err("vmgexit: scratch gpa not provided\n"); 2841 goto e_scratch; 2842 } 2843 2844 scratch_gpa_end = scratch_gpa_beg + len; 2845 if (scratch_gpa_end < scratch_gpa_beg) { 2846 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", 2847 len, scratch_gpa_beg); 2848 goto e_scratch; 2849 } 2850 2851 if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { 2852 /* Scratch area begins within GHCB */ 2853 ghcb_scratch_beg = control->ghcb_gpa + 2854 offsetof(struct ghcb, shared_buffer); 2855 ghcb_scratch_end = control->ghcb_gpa + 2856 offsetof(struct ghcb, reserved_0xff0); 2857 2858 /* 2859 * If the scratch area begins within the GHCB, it must be 2860 * completely contained in the GHCB shared buffer area. 2861 */ 2862 if (scratch_gpa_beg < ghcb_scratch_beg || 2863 scratch_gpa_end > ghcb_scratch_end) { 2864 pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", 2865 scratch_gpa_beg, scratch_gpa_end); 2866 goto e_scratch; 2867 } 2868 2869 scratch_va = (void *)svm->sev_es.ghcb; 2870 scratch_va += (scratch_gpa_beg - control->ghcb_gpa); 2871 } else { 2872 /* 2873 * The guest memory must be read into a kernel buffer, so 2874 * limit the size 2875 */ 2876 if (len > GHCB_SCRATCH_AREA_LIMIT) { 2877 pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", 2878 len, GHCB_SCRATCH_AREA_LIMIT); 2879 goto e_scratch; 2880 } 2881 scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); 2882 if (!scratch_va) 2883 return -ENOMEM; 2884 2885 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { 2886 /* Unable to copy scratch area from guest */ 2887 pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); 2888 2889 kvfree(scratch_va); 2890 return -EFAULT; 2891 } 2892 2893 /* 2894 * The scratch area is outside the GHCB. The operation will 2895 * dictate whether the buffer needs to be synced before running 2896 * the vCPU next time (i.e. a read was requested so the data 2897 * must be written back to the guest memory). 2898 */ 2899 svm->sev_es.ghcb_sa_sync = sync; 2900 svm->sev_es.ghcb_sa_free = true; 2901 } 2902 2903 svm->sev_es.ghcb_sa = scratch_va; 2904 svm->sev_es.ghcb_sa_len = len; 2905 2906 return 0; 2907 2908 e_scratch: 2909 ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); 2910 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); 2911 2912 return 1; 2913 } 2914 2915 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, 2916 unsigned int pos) 2917 { 2918 svm->vmcb->control.ghcb_gpa &= ~(mask << pos); 2919 svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; 2920 } 2921 2922 static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) 2923 { 2924 return (svm->vmcb->control.ghcb_gpa >> pos) & mask; 2925 } 2926 2927 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) 2928 { 2929 svm->vmcb->control.ghcb_gpa = value; 2930 } 2931 2932 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) 2933 { 2934 struct vmcb_control_area *control = &svm->vmcb->control; 2935 struct kvm_vcpu *vcpu = &svm->vcpu; 2936 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 2937 u64 ghcb_info; 2938 int ret = 1; 2939 2940 ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; 2941 2942 trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, 2943 control->ghcb_gpa); 2944 2945 switch (ghcb_info) { 2946 case GHCB_MSR_SEV_INFO_REQ: 2947 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, 2948 GHCB_VERSION_MIN, 2949 sev_enc_bit)); 2950 break; 2951 case GHCB_MSR_CPUID_REQ: { 2952 u64 cpuid_fn, cpuid_reg, cpuid_value; 2953 2954 cpuid_fn = get_ghcb_msr_bits(svm, 2955 GHCB_MSR_CPUID_FUNC_MASK, 2956 GHCB_MSR_CPUID_FUNC_POS); 2957 2958 /* Initialize the registers needed by the CPUID intercept */ 2959 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; 2960 vcpu->arch.regs[VCPU_REGS_RCX] = 0; 2961 2962 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID); 2963 if (!ret) { 2964 /* Error, keep GHCB MSR value as-is */ 2965 break; 2966 } 2967 2968 cpuid_reg = get_ghcb_msr_bits(svm, 2969 GHCB_MSR_CPUID_REG_MASK, 2970 GHCB_MSR_CPUID_REG_POS); 2971 if (cpuid_reg == 0) 2972 cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; 2973 else if (cpuid_reg == 1) 2974 cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; 2975 else if (cpuid_reg == 2) 2976 cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; 2977 else 2978 cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; 2979 2980 set_ghcb_msr_bits(svm, cpuid_value, 2981 GHCB_MSR_CPUID_VALUE_MASK, 2982 GHCB_MSR_CPUID_VALUE_POS); 2983 2984 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, 2985 GHCB_MSR_INFO_MASK, 2986 GHCB_MSR_INFO_POS); 2987 break; 2988 } 2989 case GHCB_MSR_AP_RESET_HOLD_REQ: 2990 svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_MSR_PROTO; 2991 ret = kvm_emulate_ap_reset_hold(&svm->vcpu); 2992 2993 /* 2994 * Preset the result to a non-SIPI return and then only set 2995 * the result to non-zero when delivering a SIPI. 2996 */ 2997 set_ghcb_msr_bits(svm, 0, 2998 GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, 2999 GHCB_MSR_AP_RESET_HOLD_RESULT_POS); 3000 3001 set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, 3002 GHCB_MSR_INFO_MASK, 3003 GHCB_MSR_INFO_POS); 3004 break; 3005 case GHCB_MSR_HV_FT_REQ: 3006 set_ghcb_msr_bits(svm, GHCB_HV_FT_SUPPORTED, 3007 GHCB_MSR_HV_FT_MASK, GHCB_MSR_HV_FT_POS); 3008 set_ghcb_msr_bits(svm, GHCB_MSR_HV_FT_RESP, 3009 GHCB_MSR_INFO_MASK, GHCB_MSR_INFO_POS); 3010 break; 3011 case GHCB_MSR_TERM_REQ: { 3012 u64 reason_set, reason_code; 3013 3014 reason_set = get_ghcb_msr_bits(svm, 3015 GHCB_MSR_TERM_REASON_SET_MASK, 3016 GHCB_MSR_TERM_REASON_SET_POS); 3017 reason_code = get_ghcb_msr_bits(svm, 3018 GHCB_MSR_TERM_REASON_MASK, 3019 GHCB_MSR_TERM_REASON_POS); 3020 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", 3021 reason_set, reason_code); 3022 3023 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 3024 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM; 3025 vcpu->run->system_event.ndata = 1; 3026 vcpu->run->system_event.data[0] = control->ghcb_gpa; 3027 3028 return 0; 3029 } 3030 default: 3031 /* Error, keep GHCB MSR value as-is */ 3032 break; 3033 } 3034 3035 trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, 3036 control->ghcb_gpa, ret); 3037 3038 return ret; 3039 } 3040 3041 int sev_handle_vmgexit(struct kvm_vcpu *vcpu) 3042 { 3043 struct vcpu_svm *svm = to_svm(vcpu); 3044 struct vmcb_control_area *control = &svm->vmcb->control; 3045 u64 ghcb_gpa, exit_code; 3046 int ret; 3047 3048 /* Validate the GHCB */ 3049 ghcb_gpa = control->ghcb_gpa; 3050 if (ghcb_gpa & GHCB_MSR_INFO_MASK) 3051 return sev_handle_vmgexit_msr_protocol(svm); 3052 3053 if (!ghcb_gpa) { 3054 vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n"); 3055 3056 /* Without a GHCB, just return right back to the guest */ 3057 return 1; 3058 } 3059 3060 if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) { 3061 /* Unable to map GHCB from guest */ 3062 vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", 3063 ghcb_gpa); 3064 3065 /* Without a GHCB, just return right back to the guest */ 3066 return 1; 3067 } 3068 3069 svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; 3070 3071 trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb); 3072 3073 sev_es_sync_from_ghcb(svm); 3074 ret = sev_es_validate_vmgexit(svm); 3075 if (ret) 3076 return ret; 3077 3078 ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0); 3079 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0); 3080 3081 exit_code = kvm_ghcb_get_sw_exit_code(control); 3082 switch (exit_code) { 3083 case SVM_VMGEXIT_MMIO_READ: 3084 ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); 3085 if (ret) 3086 break; 3087 3088 ret = kvm_sev_es_mmio_read(vcpu, 3089 control->exit_info_1, 3090 control->exit_info_2, 3091 svm->sev_es.ghcb_sa); 3092 break; 3093 case SVM_VMGEXIT_MMIO_WRITE: 3094 ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); 3095 if (ret) 3096 break; 3097 3098 ret = kvm_sev_es_mmio_write(vcpu, 3099 control->exit_info_1, 3100 control->exit_info_2, 3101 svm->sev_es.ghcb_sa); 3102 break; 3103 case SVM_VMGEXIT_NMI_COMPLETE: 3104 ++vcpu->stat.nmi_window_exits; 3105 svm->nmi_masked = false; 3106 kvm_make_request(KVM_REQ_EVENT, vcpu); 3107 ret = 1; 3108 break; 3109 case SVM_VMGEXIT_AP_HLT_LOOP: 3110 svm->sev_es.ap_reset_hold_type = AP_RESET_HOLD_NAE_EVENT; 3111 ret = kvm_emulate_ap_reset_hold(vcpu); 3112 break; 3113 case SVM_VMGEXIT_AP_JUMP_TABLE: { 3114 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 3115 3116 switch (control->exit_info_1) { 3117 case 0: 3118 /* Set AP jump table address */ 3119 sev->ap_jump_table = control->exit_info_2; 3120 break; 3121 case 1: 3122 /* Get AP jump table address */ 3123 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table); 3124 break; 3125 default: 3126 pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", 3127 control->exit_info_1); 3128 ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); 3129 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); 3130 } 3131 3132 ret = 1; 3133 break; 3134 } 3135 case SVM_VMGEXIT_HV_FEATURES: 3136 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_HV_FT_SUPPORTED); 3137 3138 ret = 1; 3139 break; 3140 case SVM_VMGEXIT_TERM_REQUEST: 3141 pr_info("SEV-ES guest requested termination: reason %#llx info %#llx\n", 3142 control->exit_info_1, control->exit_info_2); 3143 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; 3144 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_SEV_TERM; 3145 vcpu->run->system_event.ndata = 1; 3146 vcpu->run->system_event.data[0] = control->ghcb_gpa; 3147 break; 3148 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 3149 vcpu_unimpl(vcpu, 3150 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", 3151 control->exit_info_1, control->exit_info_2); 3152 ret = -EINVAL; 3153 break; 3154 default: 3155 ret = svm_invoke_exit_handler(vcpu, exit_code); 3156 } 3157 3158 return ret; 3159 } 3160 3161 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) 3162 { 3163 int count; 3164 int bytes; 3165 int r; 3166 3167 if (svm->vmcb->control.exit_info_2 > INT_MAX) 3168 return -EINVAL; 3169 3170 count = svm->vmcb->control.exit_info_2; 3171 if (unlikely(check_mul_overflow(count, size, &bytes))) 3172 return -EINVAL; 3173 3174 r = setup_vmgexit_scratch(svm, in, bytes); 3175 if (r) 3176 return r; 3177 3178 return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, 3179 count, in); 3180 } 3181 3182 static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) 3183 { 3184 struct kvm_vcpu *vcpu = &svm->vcpu; 3185 3186 if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { 3187 bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || 3188 guest_cpuid_has(vcpu, X86_FEATURE_RDPID); 3189 3190 set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); 3191 } 3192 3193 /* 3194 * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if 3195 * the host/guest supports its use. 3196 * 3197 * guest_can_use() checks a number of requirements on the host/guest to 3198 * ensure that MSR_IA32_XSS is available, but it might report true even 3199 * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host 3200 * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better 3201 * to further check that the guest CPUID actually supports 3202 * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved 3203 * guests will still get intercepted and caught in the normal 3204 * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. 3205 */ 3206 if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && 3207 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) 3208 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); 3209 else 3210 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); 3211 } 3212 3213 void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) 3214 { 3215 struct kvm_vcpu *vcpu = &svm->vcpu; 3216 struct kvm_cpuid_entry2 *best; 3217 3218 /* For sev guests, the memory encryption bit is not reserved in CR3. */ 3219 best = kvm_find_cpuid_entry(vcpu, 0x8000001F); 3220 if (best) 3221 vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f)); 3222 3223 if (sev_es_guest(svm->vcpu.kvm)) 3224 sev_es_vcpu_after_set_cpuid(svm); 3225 } 3226 3227 static void sev_es_init_vmcb(struct vcpu_svm *svm) 3228 { 3229 struct vmcb *vmcb = svm->vmcb01.ptr; 3230 struct kvm_vcpu *vcpu = &svm->vcpu; 3231 3232 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; 3233 3234 /* 3235 * An SEV-ES guest requires a VMSA area that is a separate from the 3236 * VMCB page. Do not include the encryption mask on the VMSA physical 3237 * address since hardware will access it using the guest key. Note, 3238 * the VMSA will be NULL if this vCPU is the destination for intrahost 3239 * migration, and will be copied later. 3240 */ 3241 if (svm->sev_es.vmsa) 3242 svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); 3243 3244 /* Can't intercept CR register access, HV can't modify CR registers */ 3245 svm_clr_intercept(svm, INTERCEPT_CR0_READ); 3246 svm_clr_intercept(svm, INTERCEPT_CR4_READ); 3247 svm_clr_intercept(svm, INTERCEPT_CR8_READ); 3248 svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); 3249 svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); 3250 svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); 3251 3252 svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); 3253 3254 /* Track EFER/CR register changes */ 3255 svm_set_intercept(svm, TRAP_EFER_WRITE); 3256 svm_set_intercept(svm, TRAP_CR0_WRITE); 3257 svm_set_intercept(svm, TRAP_CR4_WRITE); 3258 svm_set_intercept(svm, TRAP_CR8_WRITE); 3259 3260 vmcb->control.intercepts[INTERCEPT_DR] = 0; 3261 if (!sev_vcpu_has_debug_swap(svm)) { 3262 vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); 3263 vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); 3264 recalc_intercepts(svm); 3265 } else { 3266 /* 3267 * Disable #DB intercept iff DebugSwap is enabled. KVM doesn't 3268 * allow debugging SEV-ES guests, and enables DebugSwap iff 3269 * NO_NESTED_DATA_BP is supported, so there's no reason to 3270 * intercept #DB when DebugSwap is enabled. For simplicity 3271 * with respect to guest debug, intercept #DB for other VMs 3272 * even if NO_NESTED_DATA_BP is supported, i.e. even if the 3273 * guest can't DoS the CPU with infinite #DB vectoring. 3274 */ 3275 clr_exception_intercept(svm, DB_VECTOR); 3276 } 3277 3278 /* Can't intercept XSETBV, HV can't modify XCR0 directly */ 3279 svm_clr_intercept(svm, INTERCEPT_XSETBV); 3280 3281 /* Clear intercepts on selected MSRs */ 3282 set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); 3283 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); 3284 } 3285 3286 void sev_init_vmcb(struct vcpu_svm *svm) 3287 { 3288 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; 3289 clr_exception_intercept(svm, UD_VECTOR); 3290 3291 /* 3292 * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as 3293 * KVM can't decrypt guest memory to decode the faulting instruction. 3294 */ 3295 clr_exception_intercept(svm, GP_VECTOR); 3296 3297 if (sev_es_guest(svm->vcpu.kvm)) 3298 sev_es_init_vmcb(svm); 3299 } 3300 3301 void sev_es_vcpu_reset(struct vcpu_svm *svm) 3302 { 3303 struct kvm_vcpu *vcpu = &svm->vcpu; 3304 struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info; 3305 3306 /* 3307 * Set the GHCB MSR value as per the GHCB specification when emulating 3308 * vCPU RESET for an SEV-ES guest. 3309 */ 3310 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO((__u64)sev->ghcb_version, 3311 GHCB_VERSION_MIN, 3312 sev_enc_bit)); 3313 } 3314 3315 void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa) 3316 { 3317 /* 3318 * All host state for SEV-ES guests is categorized into three swap types 3319 * based on how it is handled by hardware during a world switch: 3320 * 3321 * A: VMRUN: Host state saved in host save area 3322 * VMEXIT: Host state loaded from host save area 3323 * 3324 * B: VMRUN: Host state _NOT_ saved in host save area 3325 * VMEXIT: Host state loaded from host save area 3326 * 3327 * C: VMRUN: Host state _NOT_ saved in host save area 3328 * VMEXIT: Host state initialized to default(reset) values 3329 * 3330 * Manually save type-B state, i.e. state that is loaded by VMEXIT but 3331 * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed 3332 * by common SVM code). 3333 */ 3334 hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 3335 hostsa->pkru = read_pkru(); 3336 hostsa->xss = host_xss; 3337 3338 /* 3339 * If DebugSwap is enabled, debug registers are loaded but NOT saved by 3340 * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both 3341 * saves and loads debug registers (Type-A). 3342 */ 3343 if (sev_vcpu_has_debug_swap(svm)) { 3344 hostsa->dr0 = native_get_debugreg(0); 3345 hostsa->dr1 = native_get_debugreg(1); 3346 hostsa->dr2 = native_get_debugreg(2); 3347 hostsa->dr3 = native_get_debugreg(3); 3348 hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); 3349 hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); 3350 hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); 3351 hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3); 3352 } 3353 } 3354 3355 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) 3356 { 3357 struct vcpu_svm *svm = to_svm(vcpu); 3358 3359 /* First SIPI: Use the values as initially set by the VMM */ 3360 if (!svm->sev_es.received_first_sipi) { 3361 svm->sev_es.received_first_sipi = true; 3362 return; 3363 } 3364 3365 /* Subsequent SIPI */ 3366 switch (svm->sev_es.ap_reset_hold_type) { 3367 case AP_RESET_HOLD_NAE_EVENT: 3368 /* 3369 * Return from an AP Reset Hold VMGEXIT, where the guest will 3370 * set the CS and RIP. Set SW_EXIT_INFO_2 to a non-zero value. 3371 */ 3372 ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); 3373 break; 3374 case AP_RESET_HOLD_MSR_PROTO: 3375 /* 3376 * Return from an AP Reset Hold VMGEXIT, where the guest will 3377 * set the CS and RIP. Set GHCB data field to a non-zero value. 3378 */ 3379 set_ghcb_msr_bits(svm, 1, 3380 GHCB_MSR_AP_RESET_HOLD_RESULT_MASK, 3381 GHCB_MSR_AP_RESET_HOLD_RESULT_POS); 3382 3383 set_ghcb_msr_bits(svm, GHCB_MSR_AP_RESET_HOLD_RESP, 3384 GHCB_MSR_INFO_MASK, 3385 GHCB_MSR_INFO_POS); 3386 break; 3387 default: 3388 break; 3389 } 3390 } 3391 3392 struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) 3393 { 3394 unsigned long pfn; 3395 struct page *p; 3396 3397 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) 3398 return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 3399 3400 /* 3401 * Allocate an SNP-safe page to workaround the SNP erratum where 3402 * the CPU will incorrectly signal an RMP violation #PF if a 3403 * hugepage (2MB or 1GB) collides with the RMP entry of a 3404 * 2MB-aligned VMCB, VMSA, or AVIC backing page. 3405 * 3406 * Allocate one extra page, choose a page which is not 3407 * 2MB-aligned, and free the other. 3408 */ 3409 p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); 3410 if (!p) 3411 return NULL; 3412 3413 split_page(p, 1); 3414 3415 pfn = page_to_pfn(p); 3416 if (IS_ALIGNED(pfn, PTRS_PER_PMD)) 3417 __free_page(p++); 3418 else 3419 __free_page(p + 1); 3420 3421 return p; 3422 } 3423