1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM-SEV support 6 * 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 */ 9 10 #include <linux/kvm_types.h> 11 #include <linux/kvm_host.h> 12 #include <linux/kernel.h> 13 #include <linux/highmem.h> 14 #include <linux/psp-sev.h> 15 #include <linux/pagemap.h> 16 #include <linux/swap.h> 17 #include <linux/processor.h> 18 #include <linux/trace_events.h> 19 #include <asm/fpu/internal.h> 20 21 #include <asm/trapnr.h> 22 23 #include "x86.h" 24 #include "svm.h" 25 #include "cpuid.h" 26 #include "trace.h" 27 28 #define __ex(x) __kvm_handle_fault_on_reboot(x) 29 30 static u8 sev_enc_bit; 31 static int sev_flush_asids(void); 32 static DECLARE_RWSEM(sev_deactivate_lock); 33 static DEFINE_MUTEX(sev_bitmap_lock); 34 unsigned int max_sev_asid; 35 static unsigned int min_sev_asid; 36 static unsigned long *sev_asid_bitmap; 37 static unsigned long *sev_reclaim_asid_bitmap; 38 39 struct enc_region { 40 struct list_head list; 41 unsigned long npages; 42 struct page **pages; 43 unsigned long uaddr; 44 unsigned long size; 45 }; 46 47 static int sev_flush_asids(void) 48 { 49 int ret, error = 0; 50 51 /* 52 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail, 53 * so it must be guarded. 54 */ 55 down_write(&sev_deactivate_lock); 56 57 wbinvd_on_all_cpus(); 58 ret = sev_guest_df_flush(&error); 59 60 up_write(&sev_deactivate_lock); 61 62 if (ret) 63 pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error); 64 65 return ret; 66 } 67 68 /* Must be called with the sev_bitmap_lock held */ 69 static bool __sev_recycle_asids(int min_asid, int max_asid) 70 { 71 int pos; 72 73 /* Check if there are any ASIDs to reclaim before performing a flush */ 74 pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid); 75 if (pos >= max_asid) 76 return false; 77 78 if (sev_flush_asids()) 79 return false; 80 81 /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ 82 bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, 83 max_sev_asid); 84 bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); 85 86 return true; 87 } 88 89 static int sev_asid_new(struct kvm_sev_info *sev) 90 { 91 int pos, min_asid, max_asid; 92 bool retry = true; 93 94 mutex_lock(&sev_bitmap_lock); 95 96 /* 97 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. 98 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. 99 */ 100 min_asid = sev->es_active ? 0 : min_sev_asid - 1; 101 max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; 102 again: 103 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); 104 if (pos >= max_asid) { 105 if (retry && __sev_recycle_asids(min_asid, max_asid)) { 106 retry = false; 107 goto again; 108 } 109 mutex_unlock(&sev_bitmap_lock); 110 return -EBUSY; 111 } 112 113 __set_bit(pos, sev_asid_bitmap); 114 115 mutex_unlock(&sev_bitmap_lock); 116 117 return pos + 1; 118 } 119 120 static int sev_get_asid(struct kvm *kvm) 121 { 122 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 123 124 return sev->asid; 125 } 126 127 static void sev_asid_free(int asid) 128 { 129 struct svm_cpu_data *sd; 130 int cpu, pos; 131 132 mutex_lock(&sev_bitmap_lock); 133 134 pos = asid - 1; 135 __set_bit(pos, sev_reclaim_asid_bitmap); 136 137 for_each_possible_cpu(cpu) { 138 sd = per_cpu(svm_data, cpu); 139 sd->sev_vmcbs[pos] = NULL; 140 } 141 142 mutex_unlock(&sev_bitmap_lock); 143 } 144 145 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) 146 { 147 struct sev_data_decommission *decommission; 148 struct sev_data_deactivate *data; 149 150 if (!handle) 151 return; 152 153 data = kzalloc(sizeof(*data), GFP_KERNEL); 154 if (!data) 155 return; 156 157 /* deactivate handle */ 158 data->handle = handle; 159 160 /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */ 161 down_read(&sev_deactivate_lock); 162 sev_guest_deactivate(data, NULL); 163 up_read(&sev_deactivate_lock); 164 165 kfree(data); 166 167 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); 168 if (!decommission) 169 return; 170 171 /* decommission handle */ 172 decommission->handle = handle; 173 sev_guest_decommission(decommission, NULL); 174 175 kfree(decommission); 176 } 177 178 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) 179 { 180 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 181 int asid, ret; 182 183 ret = -EBUSY; 184 if (unlikely(sev->active)) 185 return ret; 186 187 asid = sev_asid_new(sev); 188 if (asid < 0) 189 return ret; 190 191 ret = sev_platform_init(&argp->error); 192 if (ret) 193 goto e_free; 194 195 sev->active = true; 196 sev->asid = asid; 197 INIT_LIST_HEAD(&sev->regions_list); 198 199 return 0; 200 201 e_free: 202 sev_asid_free(asid); 203 return ret; 204 } 205 206 static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) 207 { 208 if (!sev_es) 209 return -ENOTTY; 210 211 to_kvm_svm(kvm)->sev_info.es_active = true; 212 213 return sev_guest_init(kvm, argp); 214 } 215 216 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) 217 { 218 struct sev_data_activate *data; 219 int asid = sev_get_asid(kvm); 220 int ret; 221 222 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 223 if (!data) 224 return -ENOMEM; 225 226 /* activate ASID on the given handle */ 227 data->handle = handle; 228 data->asid = asid; 229 ret = sev_guest_activate(data, error); 230 kfree(data); 231 232 return ret; 233 } 234 235 static int __sev_issue_cmd(int fd, int id, void *data, int *error) 236 { 237 struct fd f; 238 int ret; 239 240 f = fdget(fd); 241 if (!f.file) 242 return -EBADF; 243 244 ret = sev_issue_cmd_external_user(f.file, id, data, error); 245 246 fdput(f); 247 return ret; 248 } 249 250 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) 251 { 252 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 253 254 return __sev_issue_cmd(sev->fd, id, data, error); 255 } 256 257 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) 258 { 259 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 260 struct sev_data_launch_start *start; 261 struct kvm_sev_launch_start params; 262 void *dh_blob, *session_blob; 263 int *error = &argp->error; 264 int ret; 265 266 if (!sev_guest(kvm)) 267 return -ENOTTY; 268 269 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 270 return -EFAULT; 271 272 start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT); 273 if (!start) 274 return -ENOMEM; 275 276 dh_blob = NULL; 277 if (params.dh_uaddr) { 278 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len); 279 if (IS_ERR(dh_blob)) { 280 ret = PTR_ERR(dh_blob); 281 goto e_free; 282 } 283 284 start->dh_cert_address = __sme_set(__pa(dh_blob)); 285 start->dh_cert_len = params.dh_len; 286 } 287 288 session_blob = NULL; 289 if (params.session_uaddr) { 290 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len); 291 if (IS_ERR(session_blob)) { 292 ret = PTR_ERR(session_blob); 293 goto e_free_dh; 294 } 295 296 start->session_address = __sme_set(__pa(session_blob)); 297 start->session_len = params.session_len; 298 } 299 300 start->handle = params.handle; 301 start->policy = params.policy; 302 303 /* create memory encryption context */ 304 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error); 305 if (ret) 306 goto e_free_session; 307 308 /* Bind ASID to this guest */ 309 ret = sev_bind_asid(kvm, start->handle, error); 310 if (ret) 311 goto e_free_session; 312 313 /* return handle to userspace */ 314 params.handle = start->handle; 315 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) { 316 sev_unbind_asid(kvm, start->handle); 317 ret = -EFAULT; 318 goto e_free_session; 319 } 320 321 sev->handle = start->handle; 322 sev->fd = argp->sev_fd; 323 324 e_free_session: 325 kfree(session_blob); 326 e_free_dh: 327 kfree(dh_blob); 328 e_free: 329 kfree(start); 330 return ret; 331 } 332 333 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, 334 unsigned long ulen, unsigned long *n, 335 int write) 336 { 337 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 338 unsigned long npages, size; 339 int npinned; 340 unsigned long locked, lock_limit; 341 struct page **pages; 342 unsigned long first, last; 343 int ret; 344 345 lockdep_assert_held(&kvm->lock); 346 347 if (ulen == 0 || uaddr + ulen < uaddr) 348 return ERR_PTR(-EINVAL); 349 350 /* Calculate number of pages. */ 351 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; 352 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT; 353 npages = (last - first + 1); 354 355 locked = sev->pages_locked + npages; 356 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 357 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { 358 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit); 359 return ERR_PTR(-ENOMEM); 360 } 361 362 if (WARN_ON_ONCE(npages > INT_MAX)) 363 return ERR_PTR(-EINVAL); 364 365 /* Avoid using vmalloc for smaller buffers. */ 366 size = npages * sizeof(struct page *); 367 if (size > PAGE_SIZE) 368 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); 369 else 370 pages = kmalloc(size, GFP_KERNEL_ACCOUNT); 371 372 if (!pages) 373 return ERR_PTR(-ENOMEM); 374 375 /* Pin the user virtual address. */ 376 npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); 377 if (npinned != npages) { 378 pr_err("SEV: Failure locking %lu pages.\n", npages); 379 ret = -ENOMEM; 380 goto err; 381 } 382 383 *n = npages; 384 sev->pages_locked = locked; 385 386 return pages; 387 388 err: 389 if (npinned > 0) 390 unpin_user_pages(pages, npinned); 391 392 kvfree(pages); 393 return ERR_PTR(ret); 394 } 395 396 static void sev_unpin_memory(struct kvm *kvm, struct page **pages, 397 unsigned long npages) 398 { 399 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 400 401 unpin_user_pages(pages, npages); 402 kvfree(pages); 403 sev->pages_locked -= npages; 404 } 405 406 static void sev_clflush_pages(struct page *pages[], unsigned long npages) 407 { 408 uint8_t *page_virtual; 409 unsigned long i; 410 411 if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 || 412 pages == NULL) 413 return; 414 415 for (i = 0; i < npages; i++) { 416 page_virtual = kmap_atomic(pages[i]); 417 clflush_cache_range(page_virtual, PAGE_SIZE); 418 kunmap_atomic(page_virtual); 419 } 420 } 421 422 static unsigned long get_num_contig_pages(unsigned long idx, 423 struct page **inpages, unsigned long npages) 424 { 425 unsigned long paddr, next_paddr; 426 unsigned long i = idx + 1, pages = 1; 427 428 /* find the number of contiguous pages starting from idx */ 429 paddr = __sme_page_pa(inpages[idx]); 430 while (i < npages) { 431 next_paddr = __sme_page_pa(inpages[i++]); 432 if ((paddr + PAGE_SIZE) == next_paddr) { 433 pages++; 434 paddr = next_paddr; 435 continue; 436 } 437 break; 438 } 439 440 return pages; 441 } 442 443 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) 444 { 445 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i; 446 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 447 struct kvm_sev_launch_update_data params; 448 struct sev_data_launch_update_data *data; 449 struct page **inpages; 450 int ret; 451 452 if (!sev_guest(kvm)) 453 return -ENOTTY; 454 455 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 456 return -EFAULT; 457 458 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 459 if (!data) 460 return -ENOMEM; 461 462 vaddr = params.uaddr; 463 size = params.len; 464 vaddr_end = vaddr + size; 465 466 /* Lock the user memory. */ 467 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1); 468 if (IS_ERR(inpages)) { 469 ret = PTR_ERR(inpages); 470 goto e_free; 471 } 472 473 /* 474 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in 475 * place; the cache may contain the data that was written unencrypted. 476 */ 477 sev_clflush_pages(inpages, npages); 478 479 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) { 480 int offset, len; 481 482 /* 483 * If the user buffer is not page-aligned, calculate the offset 484 * within the page. 485 */ 486 offset = vaddr & (PAGE_SIZE - 1); 487 488 /* Calculate the number of pages that can be encrypted in one go. */ 489 pages = get_num_contig_pages(i, inpages, npages); 490 491 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size); 492 493 data->handle = sev->handle; 494 data->len = len; 495 data->address = __sme_page_pa(inpages[i]) + offset; 496 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error); 497 if (ret) 498 goto e_unpin; 499 500 size -= len; 501 next_vaddr = vaddr + len; 502 } 503 504 e_unpin: 505 /* content of memory is updated, mark pages dirty */ 506 for (i = 0; i < npages; i++) { 507 set_page_dirty_lock(inpages[i]); 508 mark_page_accessed(inpages[i]); 509 } 510 /* unlock the user pages */ 511 sev_unpin_memory(kvm, inpages, npages); 512 e_free: 513 kfree(data); 514 return ret; 515 } 516 517 static int sev_es_sync_vmsa(struct vcpu_svm *svm) 518 { 519 struct vmcb_save_area *save = &svm->vmcb->save; 520 521 /* Check some debug related fields before encrypting the VMSA */ 522 if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1)) 523 return -EINVAL; 524 525 /* Sync registgers */ 526 save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX]; 527 save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX]; 528 save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 529 save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX]; 530 save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP]; 531 save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP]; 532 save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI]; 533 save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI]; 534 #ifdef CONFIG_X86_64 535 save->r8 = svm->vcpu.arch.regs[VCPU_REGS_R8]; 536 save->r9 = svm->vcpu.arch.regs[VCPU_REGS_R9]; 537 save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10]; 538 save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11]; 539 save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12]; 540 save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13]; 541 save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; 542 save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; 543 #endif 544 save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; 545 546 /* Sync some non-GPR registers before encrypting */ 547 save->xcr0 = svm->vcpu.arch.xcr0; 548 save->pkru = svm->vcpu.arch.pkru; 549 save->xss = svm->vcpu.arch.ia32_xss; 550 551 /* 552 * SEV-ES will use a VMSA that is pointed to by the VMCB, not 553 * the traditional VMSA that is part of the VMCB. Copy the 554 * traditional VMSA as it has been built so far (in prep 555 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. 556 */ 557 memcpy(svm->vmsa, save, sizeof(*save)); 558 559 return 0; 560 } 561 562 static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) 563 { 564 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 565 struct sev_data_launch_update_vmsa *vmsa; 566 int i, ret; 567 568 if (!sev_es_guest(kvm)) 569 return -ENOTTY; 570 571 vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL); 572 if (!vmsa) 573 return -ENOMEM; 574 575 for (i = 0; i < kvm->created_vcpus; i++) { 576 struct vcpu_svm *svm = to_svm(kvm->vcpus[i]); 577 578 /* Perform some pre-encryption checks against the VMSA */ 579 ret = sev_es_sync_vmsa(svm); 580 if (ret) 581 goto e_free; 582 583 /* 584 * The LAUNCH_UPDATE_VMSA command will perform in-place 585 * encryption of the VMSA memory content (i.e it will write 586 * the same memory region with the guest's key), so invalidate 587 * it first. 588 */ 589 clflush_cache_range(svm->vmsa, PAGE_SIZE); 590 591 vmsa->handle = sev->handle; 592 vmsa->address = __sme_pa(svm->vmsa); 593 vmsa->len = PAGE_SIZE; 594 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, 595 &argp->error); 596 if (ret) 597 goto e_free; 598 599 svm->vcpu.arch.guest_state_protected = true; 600 } 601 602 e_free: 603 kfree(vmsa); 604 return ret; 605 } 606 607 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) 608 { 609 void __user *measure = (void __user *)(uintptr_t)argp->data; 610 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 611 struct sev_data_launch_measure *data; 612 struct kvm_sev_launch_measure params; 613 void __user *p = NULL; 614 void *blob = NULL; 615 int ret; 616 617 if (!sev_guest(kvm)) 618 return -ENOTTY; 619 620 if (copy_from_user(¶ms, measure, sizeof(params))) 621 return -EFAULT; 622 623 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 624 if (!data) 625 return -ENOMEM; 626 627 /* User wants to query the blob length */ 628 if (!params.len) 629 goto cmd; 630 631 p = (void __user *)(uintptr_t)params.uaddr; 632 if (p) { 633 if (params.len > SEV_FW_BLOB_MAX_SIZE) { 634 ret = -EINVAL; 635 goto e_free; 636 } 637 638 ret = -ENOMEM; 639 blob = kmalloc(params.len, GFP_KERNEL); 640 if (!blob) 641 goto e_free; 642 643 data->address = __psp_pa(blob); 644 data->len = params.len; 645 } 646 647 cmd: 648 data->handle = sev->handle; 649 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error); 650 651 /* 652 * If we query the session length, FW responded with expected data. 653 */ 654 if (!params.len) 655 goto done; 656 657 if (ret) 658 goto e_free_blob; 659 660 if (blob) { 661 if (copy_to_user(p, blob, params.len)) 662 ret = -EFAULT; 663 } 664 665 done: 666 params.len = data->len; 667 if (copy_to_user(measure, ¶ms, sizeof(params))) 668 ret = -EFAULT; 669 e_free_blob: 670 kfree(blob); 671 e_free: 672 kfree(data); 673 return ret; 674 } 675 676 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) 677 { 678 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 679 struct sev_data_launch_finish *data; 680 int ret; 681 682 if (!sev_guest(kvm)) 683 return -ENOTTY; 684 685 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 686 if (!data) 687 return -ENOMEM; 688 689 data->handle = sev->handle; 690 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error); 691 692 kfree(data); 693 return ret; 694 } 695 696 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) 697 { 698 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 699 struct kvm_sev_guest_status params; 700 struct sev_data_guest_status *data; 701 int ret; 702 703 if (!sev_guest(kvm)) 704 return -ENOTTY; 705 706 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 707 if (!data) 708 return -ENOMEM; 709 710 data->handle = sev->handle; 711 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error); 712 if (ret) 713 goto e_free; 714 715 params.policy = data->policy; 716 params.state = data->state; 717 params.handle = data->handle; 718 719 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) 720 ret = -EFAULT; 721 e_free: 722 kfree(data); 723 return ret; 724 } 725 726 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, 727 unsigned long dst, int size, 728 int *error, bool enc) 729 { 730 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 731 struct sev_data_dbg *data; 732 int ret; 733 734 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 735 if (!data) 736 return -ENOMEM; 737 738 data->handle = sev->handle; 739 data->dst_addr = dst; 740 data->src_addr = src; 741 data->len = size; 742 743 ret = sev_issue_cmd(kvm, 744 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT, 745 data, error); 746 kfree(data); 747 return ret; 748 } 749 750 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, 751 unsigned long dst_paddr, int sz, int *err) 752 { 753 int offset; 754 755 /* 756 * Its safe to read more than we are asked, caller should ensure that 757 * destination has enough space. 758 */ 759 offset = src_paddr & 15; 760 src_paddr = round_down(src_paddr, 16); 761 sz = round_up(sz + offset, 16); 762 763 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); 764 } 765 766 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, 767 unsigned long __user dst_uaddr, 768 unsigned long dst_paddr, 769 int size, int *err) 770 { 771 struct page *tpage = NULL; 772 int ret, offset; 773 774 /* if inputs are not 16-byte then use intermediate buffer */ 775 if (!IS_ALIGNED(dst_paddr, 16) || 776 !IS_ALIGNED(paddr, 16) || 777 !IS_ALIGNED(size, 16)) { 778 tpage = (void *)alloc_page(GFP_KERNEL); 779 if (!tpage) 780 return -ENOMEM; 781 782 dst_paddr = __sme_page_pa(tpage); 783 } 784 785 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err); 786 if (ret) 787 goto e_free; 788 789 if (tpage) { 790 offset = paddr & 15; 791 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr, 792 page_address(tpage) + offset, size)) 793 ret = -EFAULT; 794 } 795 796 e_free: 797 if (tpage) 798 __free_page(tpage); 799 800 return ret; 801 } 802 803 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, 804 unsigned long __user vaddr, 805 unsigned long dst_paddr, 806 unsigned long __user dst_vaddr, 807 int size, int *error) 808 { 809 struct page *src_tpage = NULL; 810 struct page *dst_tpage = NULL; 811 int ret, len = size; 812 813 /* If source buffer is not aligned then use an intermediate buffer */ 814 if (!IS_ALIGNED(vaddr, 16)) { 815 src_tpage = alloc_page(GFP_KERNEL); 816 if (!src_tpage) 817 return -ENOMEM; 818 819 if (copy_from_user(page_address(src_tpage), 820 (void __user *)(uintptr_t)vaddr, size)) { 821 __free_page(src_tpage); 822 return -EFAULT; 823 } 824 825 paddr = __sme_page_pa(src_tpage); 826 } 827 828 /* 829 * If destination buffer or length is not aligned then do read-modify-write: 830 * - decrypt destination in an intermediate buffer 831 * - copy the source buffer in an intermediate buffer 832 * - use the intermediate buffer as source buffer 833 */ 834 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { 835 int dst_offset; 836 837 dst_tpage = alloc_page(GFP_KERNEL); 838 if (!dst_tpage) { 839 ret = -ENOMEM; 840 goto e_free; 841 } 842 843 ret = __sev_dbg_decrypt(kvm, dst_paddr, 844 __sme_page_pa(dst_tpage), size, error); 845 if (ret) 846 goto e_free; 847 848 /* 849 * If source is kernel buffer then use memcpy() otherwise 850 * copy_from_user(). 851 */ 852 dst_offset = dst_paddr & 15; 853 854 if (src_tpage) 855 memcpy(page_address(dst_tpage) + dst_offset, 856 page_address(src_tpage), size); 857 else { 858 if (copy_from_user(page_address(dst_tpage) + dst_offset, 859 (void __user *)(uintptr_t)vaddr, size)) { 860 ret = -EFAULT; 861 goto e_free; 862 } 863 } 864 865 paddr = __sme_page_pa(dst_tpage); 866 dst_paddr = round_down(dst_paddr, 16); 867 len = round_up(size, 16); 868 } 869 870 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true); 871 872 e_free: 873 if (src_tpage) 874 __free_page(src_tpage); 875 if (dst_tpage) 876 __free_page(dst_tpage); 877 return ret; 878 } 879 880 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) 881 { 882 unsigned long vaddr, vaddr_end, next_vaddr; 883 unsigned long dst_vaddr; 884 struct page **src_p, **dst_p; 885 struct kvm_sev_dbg debug; 886 unsigned long n; 887 unsigned int size; 888 int ret; 889 890 if (!sev_guest(kvm)) 891 return -ENOTTY; 892 893 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug))) 894 return -EFAULT; 895 896 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr) 897 return -EINVAL; 898 if (!debug.dst_uaddr) 899 return -EINVAL; 900 901 vaddr = debug.src_uaddr; 902 size = debug.len; 903 vaddr_end = vaddr + size; 904 dst_vaddr = debug.dst_uaddr; 905 906 for (; vaddr < vaddr_end; vaddr = next_vaddr) { 907 int len, s_off, d_off; 908 909 /* lock userspace source and destination page */ 910 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0); 911 if (IS_ERR(src_p)) 912 return PTR_ERR(src_p); 913 914 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1); 915 if (IS_ERR(dst_p)) { 916 sev_unpin_memory(kvm, src_p, n); 917 return PTR_ERR(dst_p); 918 } 919 920 /* 921 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify 922 * the pages; flush the destination too so that future accesses do not 923 * see stale data. 924 */ 925 sev_clflush_pages(src_p, 1); 926 sev_clflush_pages(dst_p, 1); 927 928 /* 929 * Since user buffer may not be page aligned, calculate the 930 * offset within the page. 931 */ 932 s_off = vaddr & ~PAGE_MASK; 933 d_off = dst_vaddr & ~PAGE_MASK; 934 len = min_t(size_t, (PAGE_SIZE - s_off), size); 935 936 if (dec) 937 ret = __sev_dbg_decrypt_user(kvm, 938 __sme_page_pa(src_p[0]) + s_off, 939 dst_vaddr, 940 __sme_page_pa(dst_p[0]) + d_off, 941 len, &argp->error); 942 else 943 ret = __sev_dbg_encrypt_user(kvm, 944 __sme_page_pa(src_p[0]) + s_off, 945 vaddr, 946 __sme_page_pa(dst_p[0]) + d_off, 947 dst_vaddr, 948 len, &argp->error); 949 950 sev_unpin_memory(kvm, src_p, n); 951 sev_unpin_memory(kvm, dst_p, n); 952 953 if (ret) 954 goto err; 955 956 next_vaddr = vaddr + len; 957 dst_vaddr = dst_vaddr + len; 958 size -= len; 959 } 960 err: 961 return ret; 962 } 963 964 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) 965 { 966 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 967 struct sev_data_launch_secret *data; 968 struct kvm_sev_launch_secret params; 969 struct page **pages; 970 void *blob, *hdr; 971 unsigned long n, i; 972 int ret, offset; 973 974 if (!sev_guest(kvm)) 975 return -ENOTTY; 976 977 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) 978 return -EFAULT; 979 980 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1); 981 if (IS_ERR(pages)) 982 return PTR_ERR(pages); 983 984 /* 985 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in 986 * place; the cache may contain the data that was written unencrypted. 987 */ 988 sev_clflush_pages(pages, n); 989 990 /* 991 * The secret must be copied into contiguous memory region, lets verify 992 * that userspace memory pages are contiguous before we issue command. 993 */ 994 if (get_num_contig_pages(0, pages, n) != n) { 995 ret = -EINVAL; 996 goto e_unpin_memory; 997 } 998 999 ret = -ENOMEM; 1000 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); 1001 if (!data) 1002 goto e_unpin_memory; 1003 1004 offset = params.guest_uaddr & (PAGE_SIZE - 1); 1005 data->guest_address = __sme_page_pa(pages[0]) + offset; 1006 data->guest_len = params.guest_len; 1007 1008 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); 1009 if (IS_ERR(blob)) { 1010 ret = PTR_ERR(blob); 1011 goto e_free; 1012 } 1013 1014 data->trans_address = __psp_pa(blob); 1015 data->trans_len = params.trans_len; 1016 1017 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len); 1018 if (IS_ERR(hdr)) { 1019 ret = PTR_ERR(hdr); 1020 goto e_free_blob; 1021 } 1022 data->hdr_address = __psp_pa(hdr); 1023 data->hdr_len = params.hdr_len; 1024 1025 data->handle = sev->handle; 1026 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); 1027 1028 kfree(hdr); 1029 1030 e_free_blob: 1031 kfree(blob); 1032 e_free: 1033 kfree(data); 1034 e_unpin_memory: 1035 /* content of memory is updated, mark pages dirty */ 1036 for (i = 0; i < n; i++) { 1037 set_page_dirty_lock(pages[i]); 1038 mark_page_accessed(pages[i]); 1039 } 1040 sev_unpin_memory(kvm, pages, n); 1041 return ret; 1042 } 1043 1044 int svm_mem_enc_op(struct kvm *kvm, void __user *argp) 1045 { 1046 struct kvm_sev_cmd sev_cmd; 1047 int r; 1048 1049 if (!svm_sev_enabled() || !sev) 1050 return -ENOTTY; 1051 1052 if (!argp) 1053 return 0; 1054 1055 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd))) 1056 return -EFAULT; 1057 1058 mutex_lock(&kvm->lock); 1059 1060 switch (sev_cmd.id) { 1061 case KVM_SEV_INIT: 1062 r = sev_guest_init(kvm, &sev_cmd); 1063 break; 1064 case KVM_SEV_ES_INIT: 1065 r = sev_es_guest_init(kvm, &sev_cmd); 1066 break; 1067 case KVM_SEV_LAUNCH_START: 1068 r = sev_launch_start(kvm, &sev_cmd); 1069 break; 1070 case KVM_SEV_LAUNCH_UPDATE_DATA: 1071 r = sev_launch_update_data(kvm, &sev_cmd); 1072 break; 1073 case KVM_SEV_LAUNCH_UPDATE_VMSA: 1074 r = sev_launch_update_vmsa(kvm, &sev_cmd); 1075 break; 1076 case KVM_SEV_LAUNCH_MEASURE: 1077 r = sev_launch_measure(kvm, &sev_cmd); 1078 break; 1079 case KVM_SEV_LAUNCH_FINISH: 1080 r = sev_launch_finish(kvm, &sev_cmd); 1081 break; 1082 case KVM_SEV_GUEST_STATUS: 1083 r = sev_guest_status(kvm, &sev_cmd); 1084 break; 1085 case KVM_SEV_DBG_DECRYPT: 1086 r = sev_dbg_crypt(kvm, &sev_cmd, true); 1087 break; 1088 case KVM_SEV_DBG_ENCRYPT: 1089 r = sev_dbg_crypt(kvm, &sev_cmd, false); 1090 break; 1091 case KVM_SEV_LAUNCH_SECRET: 1092 r = sev_launch_secret(kvm, &sev_cmd); 1093 break; 1094 default: 1095 r = -EINVAL; 1096 goto out; 1097 } 1098 1099 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd))) 1100 r = -EFAULT; 1101 1102 out: 1103 mutex_unlock(&kvm->lock); 1104 return r; 1105 } 1106 1107 int svm_register_enc_region(struct kvm *kvm, 1108 struct kvm_enc_region *range) 1109 { 1110 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1111 struct enc_region *region; 1112 int ret = 0; 1113 1114 if (!sev_guest(kvm)) 1115 return -ENOTTY; 1116 1117 if (range->addr > ULONG_MAX || range->size > ULONG_MAX) 1118 return -EINVAL; 1119 1120 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); 1121 if (!region) 1122 return -ENOMEM; 1123 1124 mutex_lock(&kvm->lock); 1125 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1); 1126 if (IS_ERR(region->pages)) { 1127 ret = PTR_ERR(region->pages); 1128 mutex_unlock(&kvm->lock); 1129 goto e_free; 1130 } 1131 1132 region->uaddr = range->addr; 1133 region->size = range->size; 1134 1135 list_add_tail(®ion->list, &sev->regions_list); 1136 mutex_unlock(&kvm->lock); 1137 1138 /* 1139 * The guest may change the memory encryption attribute from C=0 -> C=1 1140 * or vice versa for this memory range. Lets make sure caches are 1141 * flushed to ensure that guest data gets written into memory with 1142 * correct C-bit. 1143 */ 1144 sev_clflush_pages(region->pages, region->npages); 1145 1146 return ret; 1147 1148 e_free: 1149 kfree(region); 1150 return ret; 1151 } 1152 1153 static struct enc_region * 1154 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) 1155 { 1156 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1157 struct list_head *head = &sev->regions_list; 1158 struct enc_region *i; 1159 1160 list_for_each_entry(i, head, list) { 1161 if (i->uaddr == range->addr && 1162 i->size == range->size) 1163 return i; 1164 } 1165 1166 return NULL; 1167 } 1168 1169 static void __unregister_enc_region_locked(struct kvm *kvm, 1170 struct enc_region *region) 1171 { 1172 sev_unpin_memory(kvm, region->pages, region->npages); 1173 list_del(®ion->list); 1174 kfree(region); 1175 } 1176 1177 int svm_unregister_enc_region(struct kvm *kvm, 1178 struct kvm_enc_region *range) 1179 { 1180 struct enc_region *region; 1181 int ret; 1182 1183 mutex_lock(&kvm->lock); 1184 1185 if (!sev_guest(kvm)) { 1186 ret = -ENOTTY; 1187 goto failed; 1188 } 1189 1190 region = find_enc_region(kvm, range); 1191 if (!region) { 1192 ret = -EINVAL; 1193 goto failed; 1194 } 1195 1196 /* 1197 * Ensure that all guest tagged cache entries are flushed before 1198 * releasing the pages back to the system for use. CLFLUSH will 1199 * not do this, so issue a WBINVD. 1200 */ 1201 wbinvd_on_all_cpus(); 1202 1203 __unregister_enc_region_locked(kvm, region); 1204 1205 mutex_unlock(&kvm->lock); 1206 return 0; 1207 1208 failed: 1209 mutex_unlock(&kvm->lock); 1210 return ret; 1211 } 1212 1213 void sev_vm_destroy(struct kvm *kvm) 1214 { 1215 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1216 struct list_head *head = &sev->regions_list; 1217 struct list_head *pos, *q; 1218 1219 if (!sev_guest(kvm)) 1220 return; 1221 1222 mutex_lock(&kvm->lock); 1223 1224 /* 1225 * Ensure that all guest tagged cache entries are flushed before 1226 * releasing the pages back to the system for use. CLFLUSH will 1227 * not do this, so issue a WBINVD. 1228 */ 1229 wbinvd_on_all_cpus(); 1230 1231 /* 1232 * if userspace was terminated before unregistering the memory regions 1233 * then lets unpin all the registered memory. 1234 */ 1235 if (!list_empty(head)) { 1236 list_for_each_safe(pos, q, head) { 1237 __unregister_enc_region_locked(kvm, 1238 list_entry(pos, struct enc_region, list)); 1239 cond_resched(); 1240 } 1241 } 1242 1243 mutex_unlock(&kvm->lock); 1244 1245 sev_unbind_asid(kvm, sev->handle); 1246 sev_asid_free(sev->asid); 1247 } 1248 1249 void __init sev_hardware_setup(void) 1250 { 1251 unsigned int eax, ebx, ecx, edx; 1252 bool sev_es_supported = false; 1253 bool sev_supported = false; 1254 1255 /* Does the CPU support SEV? */ 1256 if (!boot_cpu_has(X86_FEATURE_SEV)) 1257 goto out; 1258 1259 /* Retrieve SEV CPUID information */ 1260 cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); 1261 1262 /* Set encryption bit location for SEV-ES guests */ 1263 sev_enc_bit = ebx & 0x3f; 1264 1265 /* Maximum number of encrypted guests supported simultaneously */ 1266 max_sev_asid = ecx; 1267 1268 if (!svm_sev_enabled()) 1269 goto out; 1270 1271 /* Minimum ASID value that should be used for SEV guest */ 1272 min_sev_asid = edx; 1273 1274 /* Initialize SEV ASID bitmaps */ 1275 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); 1276 if (!sev_asid_bitmap) 1277 goto out; 1278 1279 sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); 1280 if (!sev_reclaim_asid_bitmap) 1281 goto out; 1282 1283 pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1); 1284 sev_supported = true; 1285 1286 /* SEV-ES support requested? */ 1287 if (!sev_es) 1288 goto out; 1289 1290 /* Does the CPU support SEV-ES? */ 1291 if (!boot_cpu_has(X86_FEATURE_SEV_ES)) 1292 goto out; 1293 1294 /* Has the system been allocated ASIDs for SEV-ES? */ 1295 if (min_sev_asid == 1) 1296 goto out; 1297 1298 pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1); 1299 sev_es_supported = true; 1300 1301 out: 1302 sev = sev_supported; 1303 sev_es = sev_es_supported; 1304 } 1305 1306 void sev_hardware_teardown(void) 1307 { 1308 if (!svm_sev_enabled()) 1309 return; 1310 1311 bitmap_free(sev_asid_bitmap); 1312 bitmap_free(sev_reclaim_asid_bitmap); 1313 1314 sev_flush_asids(); 1315 } 1316 1317 /* 1318 * Pages used by hardware to hold guest encrypted state must be flushed before 1319 * returning them to the system. 1320 */ 1321 static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, 1322 unsigned long len) 1323 { 1324 /* 1325 * If hardware enforced cache coherency for encrypted mappings of the 1326 * same physical page is supported, nothing to do. 1327 */ 1328 if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) 1329 return; 1330 1331 /* 1332 * If the VM Page Flush MSR is supported, use it to flush the page 1333 * (using the page virtual address and the guest ASID). 1334 */ 1335 if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { 1336 struct kvm_sev_info *sev; 1337 unsigned long va_start; 1338 u64 start, stop; 1339 1340 /* Align start and stop to page boundaries. */ 1341 va_start = (unsigned long)va; 1342 start = (u64)va_start & PAGE_MASK; 1343 stop = PAGE_ALIGN((u64)va_start + len); 1344 1345 if (start < stop) { 1346 sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; 1347 1348 while (start < stop) { 1349 wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, 1350 start | sev->asid); 1351 1352 start += PAGE_SIZE; 1353 } 1354 1355 return; 1356 } 1357 1358 WARN(1, "Address overflow, using WBINVD\n"); 1359 } 1360 1361 /* 1362 * Hardware should always have one of the above features, 1363 * but if not, use WBINVD and issue a warning. 1364 */ 1365 WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); 1366 wbinvd_on_all_cpus(); 1367 } 1368 1369 void sev_free_vcpu(struct kvm_vcpu *vcpu) 1370 { 1371 struct vcpu_svm *svm; 1372 1373 if (!sev_es_guest(vcpu->kvm)) 1374 return; 1375 1376 svm = to_svm(vcpu); 1377 1378 if (vcpu->arch.guest_state_protected) 1379 sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); 1380 __free_page(virt_to_page(svm->vmsa)); 1381 1382 if (svm->ghcb_sa_free) 1383 kfree(svm->ghcb_sa); 1384 } 1385 1386 static void dump_ghcb(struct vcpu_svm *svm) 1387 { 1388 struct ghcb *ghcb = svm->ghcb; 1389 unsigned int nbits; 1390 1391 /* Re-use the dump_invalid_vmcb module parameter */ 1392 if (!dump_invalid_vmcb) { 1393 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); 1394 return; 1395 } 1396 1397 nbits = sizeof(ghcb->save.valid_bitmap) * 8; 1398 1399 pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa); 1400 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code", 1401 ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb)); 1402 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1", 1403 ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb)); 1404 pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2", 1405 ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb)); 1406 pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch", 1407 ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb)); 1408 pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap); 1409 } 1410 1411 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) 1412 { 1413 struct kvm_vcpu *vcpu = &svm->vcpu; 1414 struct ghcb *ghcb = svm->ghcb; 1415 1416 /* 1417 * The GHCB protocol so far allows for the following data 1418 * to be returned: 1419 * GPRs RAX, RBX, RCX, RDX 1420 * 1421 * Copy their values, even if they may not have been written during the 1422 * VM-Exit. It's the guest's responsibility to not consume random data. 1423 */ 1424 ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); 1425 ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); 1426 ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); 1427 ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); 1428 } 1429 1430 static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) 1431 { 1432 struct vmcb_control_area *control = &svm->vmcb->control; 1433 struct kvm_vcpu *vcpu = &svm->vcpu; 1434 struct ghcb *ghcb = svm->ghcb; 1435 u64 exit_code; 1436 1437 /* 1438 * The GHCB protocol so far allows for the following data 1439 * to be supplied: 1440 * GPRs RAX, RBX, RCX, RDX 1441 * XCR0 1442 * CPL 1443 * 1444 * VMMCALL allows the guest to provide extra registers. KVM also 1445 * expects RSI for hypercalls, so include that, too. 1446 * 1447 * Copy their values to the appropriate location if supplied. 1448 */ 1449 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); 1450 1451 vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); 1452 vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); 1453 vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); 1454 vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); 1455 vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); 1456 1457 svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); 1458 1459 if (ghcb_xcr0_is_valid(ghcb)) { 1460 vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); 1461 kvm_update_cpuid_runtime(vcpu); 1462 } 1463 1464 /* Copy the GHCB exit information into the VMCB fields */ 1465 exit_code = ghcb_get_sw_exit_code(ghcb); 1466 control->exit_code = lower_32_bits(exit_code); 1467 control->exit_code_hi = upper_32_bits(exit_code); 1468 control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); 1469 control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); 1470 1471 /* Clear the valid entries fields */ 1472 memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); 1473 } 1474 1475 static int sev_es_validate_vmgexit(struct vcpu_svm *svm) 1476 { 1477 struct kvm_vcpu *vcpu; 1478 struct ghcb *ghcb; 1479 u64 exit_code = 0; 1480 1481 ghcb = svm->ghcb; 1482 1483 /* Only GHCB Usage code 0 is supported */ 1484 if (ghcb->ghcb_usage) 1485 goto vmgexit_err; 1486 1487 /* 1488 * Retrieve the exit code now even though is may not be marked valid 1489 * as it could help with debugging. 1490 */ 1491 exit_code = ghcb_get_sw_exit_code(ghcb); 1492 1493 if (!ghcb_sw_exit_code_is_valid(ghcb) || 1494 !ghcb_sw_exit_info_1_is_valid(ghcb) || 1495 !ghcb_sw_exit_info_2_is_valid(ghcb)) 1496 goto vmgexit_err; 1497 1498 switch (ghcb_get_sw_exit_code(ghcb)) { 1499 case SVM_EXIT_READ_DR7: 1500 break; 1501 case SVM_EXIT_WRITE_DR7: 1502 if (!ghcb_rax_is_valid(ghcb)) 1503 goto vmgexit_err; 1504 break; 1505 case SVM_EXIT_RDTSC: 1506 break; 1507 case SVM_EXIT_RDPMC: 1508 if (!ghcb_rcx_is_valid(ghcb)) 1509 goto vmgexit_err; 1510 break; 1511 case SVM_EXIT_CPUID: 1512 if (!ghcb_rax_is_valid(ghcb) || 1513 !ghcb_rcx_is_valid(ghcb)) 1514 goto vmgexit_err; 1515 if (ghcb_get_rax(ghcb) == 0xd) 1516 if (!ghcb_xcr0_is_valid(ghcb)) 1517 goto vmgexit_err; 1518 break; 1519 case SVM_EXIT_INVD: 1520 break; 1521 case SVM_EXIT_IOIO: 1522 if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { 1523 if (!ghcb_sw_scratch_is_valid(ghcb)) 1524 goto vmgexit_err; 1525 } else { 1526 if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) 1527 if (!ghcb_rax_is_valid(ghcb)) 1528 goto vmgexit_err; 1529 } 1530 break; 1531 case SVM_EXIT_MSR: 1532 if (!ghcb_rcx_is_valid(ghcb)) 1533 goto vmgexit_err; 1534 if (ghcb_get_sw_exit_info_1(ghcb)) { 1535 if (!ghcb_rax_is_valid(ghcb) || 1536 !ghcb_rdx_is_valid(ghcb)) 1537 goto vmgexit_err; 1538 } 1539 break; 1540 case SVM_EXIT_VMMCALL: 1541 if (!ghcb_rax_is_valid(ghcb) || 1542 !ghcb_cpl_is_valid(ghcb)) 1543 goto vmgexit_err; 1544 break; 1545 case SVM_EXIT_RDTSCP: 1546 break; 1547 case SVM_EXIT_WBINVD: 1548 break; 1549 case SVM_EXIT_MONITOR: 1550 if (!ghcb_rax_is_valid(ghcb) || 1551 !ghcb_rcx_is_valid(ghcb) || 1552 !ghcb_rdx_is_valid(ghcb)) 1553 goto vmgexit_err; 1554 break; 1555 case SVM_EXIT_MWAIT: 1556 if (!ghcb_rax_is_valid(ghcb) || 1557 !ghcb_rcx_is_valid(ghcb)) 1558 goto vmgexit_err; 1559 break; 1560 case SVM_VMGEXIT_MMIO_READ: 1561 case SVM_VMGEXIT_MMIO_WRITE: 1562 if (!ghcb_sw_scratch_is_valid(ghcb)) 1563 goto vmgexit_err; 1564 break; 1565 case SVM_VMGEXIT_NMI_COMPLETE: 1566 case SVM_VMGEXIT_AP_HLT_LOOP: 1567 case SVM_VMGEXIT_AP_JUMP_TABLE: 1568 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 1569 break; 1570 default: 1571 goto vmgexit_err; 1572 } 1573 1574 return 0; 1575 1576 vmgexit_err: 1577 vcpu = &svm->vcpu; 1578 1579 if (ghcb->ghcb_usage) { 1580 vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", 1581 ghcb->ghcb_usage); 1582 } else { 1583 vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n", 1584 exit_code); 1585 dump_ghcb(svm); 1586 } 1587 1588 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1589 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; 1590 vcpu->run->internal.ndata = 2; 1591 vcpu->run->internal.data[0] = exit_code; 1592 vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu; 1593 1594 return -EINVAL; 1595 } 1596 1597 static void pre_sev_es_run(struct vcpu_svm *svm) 1598 { 1599 if (!svm->ghcb) 1600 return; 1601 1602 if (svm->ghcb_sa_free) { 1603 /* 1604 * The scratch area lives outside the GHCB, so there is a 1605 * buffer that, depending on the operation performed, may 1606 * need to be synced, then freed. 1607 */ 1608 if (svm->ghcb_sa_sync) { 1609 kvm_write_guest(svm->vcpu.kvm, 1610 ghcb_get_sw_scratch(svm->ghcb), 1611 svm->ghcb_sa, svm->ghcb_sa_len); 1612 svm->ghcb_sa_sync = false; 1613 } 1614 1615 kfree(svm->ghcb_sa); 1616 svm->ghcb_sa = NULL; 1617 svm->ghcb_sa_free = false; 1618 } 1619 1620 trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); 1621 1622 sev_es_sync_to_ghcb(svm); 1623 1624 kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); 1625 svm->ghcb = NULL; 1626 } 1627 1628 void pre_sev_run(struct vcpu_svm *svm, int cpu) 1629 { 1630 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 1631 int asid = sev_get_asid(svm->vcpu.kvm); 1632 1633 /* Perform any SEV-ES pre-run actions */ 1634 pre_sev_es_run(svm); 1635 1636 /* Assign the asid allocated with this SEV guest */ 1637 svm->asid = asid; 1638 1639 /* 1640 * Flush guest TLB: 1641 * 1642 * 1) when different VMCB for the same ASID is to be run on the same host CPU. 1643 * 2) or this VMCB was executed on different host CPU in previous VMRUNs. 1644 */ 1645 if (sd->sev_vmcbs[asid] == svm->vmcb && 1646 svm->vcpu.arch.last_vmentry_cpu == cpu) 1647 return; 1648 1649 sd->sev_vmcbs[asid] = svm->vmcb; 1650 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 1651 vmcb_mark_dirty(svm->vmcb, VMCB_ASID); 1652 } 1653 1654 #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) 1655 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) 1656 { 1657 struct vmcb_control_area *control = &svm->vmcb->control; 1658 struct ghcb *ghcb = svm->ghcb; 1659 u64 ghcb_scratch_beg, ghcb_scratch_end; 1660 u64 scratch_gpa_beg, scratch_gpa_end; 1661 void *scratch_va; 1662 1663 scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); 1664 if (!scratch_gpa_beg) { 1665 pr_err("vmgexit: scratch gpa not provided\n"); 1666 return false; 1667 } 1668 1669 scratch_gpa_end = scratch_gpa_beg + len; 1670 if (scratch_gpa_end < scratch_gpa_beg) { 1671 pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n", 1672 len, scratch_gpa_beg); 1673 return false; 1674 } 1675 1676 if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) { 1677 /* Scratch area begins within GHCB */ 1678 ghcb_scratch_beg = control->ghcb_gpa + 1679 offsetof(struct ghcb, shared_buffer); 1680 ghcb_scratch_end = control->ghcb_gpa + 1681 offsetof(struct ghcb, reserved_1); 1682 1683 /* 1684 * If the scratch area begins within the GHCB, it must be 1685 * completely contained in the GHCB shared buffer area. 1686 */ 1687 if (scratch_gpa_beg < ghcb_scratch_beg || 1688 scratch_gpa_end > ghcb_scratch_end) { 1689 pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n", 1690 scratch_gpa_beg, scratch_gpa_end); 1691 return false; 1692 } 1693 1694 scratch_va = (void *)svm->ghcb; 1695 scratch_va += (scratch_gpa_beg - control->ghcb_gpa); 1696 } else { 1697 /* 1698 * The guest memory must be read into a kernel buffer, so 1699 * limit the size 1700 */ 1701 if (len > GHCB_SCRATCH_AREA_LIMIT) { 1702 pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n", 1703 len, GHCB_SCRATCH_AREA_LIMIT); 1704 return false; 1705 } 1706 scratch_va = kzalloc(len, GFP_KERNEL); 1707 if (!scratch_va) 1708 return false; 1709 1710 if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { 1711 /* Unable to copy scratch area from guest */ 1712 pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); 1713 1714 kfree(scratch_va); 1715 return false; 1716 } 1717 1718 /* 1719 * The scratch area is outside the GHCB. The operation will 1720 * dictate whether the buffer needs to be synced before running 1721 * the vCPU next time (i.e. a read was requested so the data 1722 * must be written back to the guest memory). 1723 */ 1724 svm->ghcb_sa_sync = sync; 1725 svm->ghcb_sa_free = true; 1726 } 1727 1728 svm->ghcb_sa = scratch_va; 1729 svm->ghcb_sa_len = len; 1730 1731 return true; 1732 } 1733 1734 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, 1735 unsigned int pos) 1736 { 1737 svm->vmcb->control.ghcb_gpa &= ~(mask << pos); 1738 svm->vmcb->control.ghcb_gpa |= (value & mask) << pos; 1739 } 1740 1741 static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos) 1742 { 1743 return (svm->vmcb->control.ghcb_gpa >> pos) & mask; 1744 } 1745 1746 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value) 1747 { 1748 svm->vmcb->control.ghcb_gpa = value; 1749 } 1750 1751 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) 1752 { 1753 struct vmcb_control_area *control = &svm->vmcb->control; 1754 struct kvm_vcpu *vcpu = &svm->vcpu; 1755 u64 ghcb_info; 1756 int ret = 1; 1757 1758 ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK; 1759 1760 trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id, 1761 control->ghcb_gpa); 1762 1763 switch (ghcb_info) { 1764 case GHCB_MSR_SEV_INFO_REQ: 1765 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, 1766 GHCB_VERSION_MIN, 1767 sev_enc_bit)); 1768 break; 1769 case GHCB_MSR_CPUID_REQ: { 1770 u64 cpuid_fn, cpuid_reg, cpuid_value; 1771 1772 cpuid_fn = get_ghcb_msr_bits(svm, 1773 GHCB_MSR_CPUID_FUNC_MASK, 1774 GHCB_MSR_CPUID_FUNC_POS); 1775 1776 /* Initialize the registers needed by the CPUID intercept */ 1777 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn; 1778 vcpu->arch.regs[VCPU_REGS_RCX] = 0; 1779 1780 ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID); 1781 if (!ret) { 1782 ret = -EINVAL; 1783 break; 1784 } 1785 1786 cpuid_reg = get_ghcb_msr_bits(svm, 1787 GHCB_MSR_CPUID_REG_MASK, 1788 GHCB_MSR_CPUID_REG_POS); 1789 if (cpuid_reg == 0) 1790 cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX]; 1791 else if (cpuid_reg == 1) 1792 cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX]; 1793 else if (cpuid_reg == 2) 1794 cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX]; 1795 else 1796 cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX]; 1797 1798 set_ghcb_msr_bits(svm, cpuid_value, 1799 GHCB_MSR_CPUID_VALUE_MASK, 1800 GHCB_MSR_CPUID_VALUE_POS); 1801 1802 set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP, 1803 GHCB_MSR_INFO_MASK, 1804 GHCB_MSR_INFO_POS); 1805 break; 1806 } 1807 case GHCB_MSR_TERM_REQ: { 1808 u64 reason_set, reason_code; 1809 1810 reason_set = get_ghcb_msr_bits(svm, 1811 GHCB_MSR_TERM_REASON_SET_MASK, 1812 GHCB_MSR_TERM_REASON_SET_POS); 1813 reason_code = get_ghcb_msr_bits(svm, 1814 GHCB_MSR_TERM_REASON_MASK, 1815 GHCB_MSR_TERM_REASON_POS); 1816 pr_info("SEV-ES guest requested termination: %#llx:%#llx\n", 1817 reason_set, reason_code); 1818 fallthrough; 1819 } 1820 default: 1821 ret = -EINVAL; 1822 } 1823 1824 trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id, 1825 control->ghcb_gpa, ret); 1826 1827 return ret; 1828 } 1829 1830 int sev_handle_vmgexit(struct vcpu_svm *svm) 1831 { 1832 struct vmcb_control_area *control = &svm->vmcb->control; 1833 u64 ghcb_gpa, exit_code; 1834 struct ghcb *ghcb; 1835 int ret; 1836 1837 /* Validate the GHCB */ 1838 ghcb_gpa = control->ghcb_gpa; 1839 if (ghcb_gpa & GHCB_MSR_INFO_MASK) 1840 return sev_handle_vmgexit_msr_protocol(svm); 1841 1842 if (!ghcb_gpa) { 1843 vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n"); 1844 return -EINVAL; 1845 } 1846 1847 if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) { 1848 /* Unable to map GHCB from guest */ 1849 vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", 1850 ghcb_gpa); 1851 return -EINVAL; 1852 } 1853 1854 svm->ghcb = svm->ghcb_map.hva; 1855 ghcb = svm->ghcb_map.hva; 1856 1857 trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb); 1858 1859 exit_code = ghcb_get_sw_exit_code(ghcb); 1860 1861 ret = sev_es_validate_vmgexit(svm); 1862 if (ret) 1863 return ret; 1864 1865 sev_es_sync_from_ghcb(svm); 1866 ghcb_set_sw_exit_info_1(ghcb, 0); 1867 ghcb_set_sw_exit_info_2(ghcb, 0); 1868 1869 ret = -EINVAL; 1870 switch (exit_code) { 1871 case SVM_VMGEXIT_MMIO_READ: 1872 if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) 1873 break; 1874 1875 ret = kvm_sev_es_mmio_read(&svm->vcpu, 1876 control->exit_info_1, 1877 control->exit_info_2, 1878 svm->ghcb_sa); 1879 break; 1880 case SVM_VMGEXIT_MMIO_WRITE: 1881 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) 1882 break; 1883 1884 ret = kvm_sev_es_mmio_write(&svm->vcpu, 1885 control->exit_info_1, 1886 control->exit_info_2, 1887 svm->ghcb_sa); 1888 break; 1889 case SVM_VMGEXIT_NMI_COMPLETE: 1890 ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); 1891 break; 1892 case SVM_VMGEXIT_AP_HLT_LOOP: 1893 ret = kvm_emulate_ap_reset_hold(&svm->vcpu); 1894 break; 1895 case SVM_VMGEXIT_AP_JUMP_TABLE: { 1896 struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; 1897 1898 switch (control->exit_info_1) { 1899 case 0: 1900 /* Set AP jump table address */ 1901 sev->ap_jump_table = control->exit_info_2; 1902 break; 1903 case 1: 1904 /* Get AP jump table address */ 1905 ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table); 1906 break; 1907 default: 1908 pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", 1909 control->exit_info_1); 1910 ghcb_set_sw_exit_info_1(ghcb, 1); 1911 ghcb_set_sw_exit_info_2(ghcb, 1912 X86_TRAP_UD | 1913 SVM_EVTINJ_TYPE_EXEPT | 1914 SVM_EVTINJ_VALID); 1915 } 1916 1917 ret = 1; 1918 break; 1919 } 1920 case SVM_VMGEXIT_UNSUPPORTED_EVENT: 1921 vcpu_unimpl(&svm->vcpu, 1922 "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", 1923 control->exit_info_1, control->exit_info_2); 1924 break; 1925 default: 1926 ret = svm_invoke_exit_handler(svm, exit_code); 1927 } 1928 1929 return ret; 1930 } 1931 1932 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) 1933 { 1934 if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2)) 1935 return -EINVAL; 1936 1937 return kvm_sev_es_string_io(&svm->vcpu, size, port, 1938 svm->ghcb_sa, svm->ghcb_sa_len, in); 1939 } 1940 1941 void sev_es_init_vmcb(struct vcpu_svm *svm) 1942 { 1943 struct kvm_vcpu *vcpu = &svm->vcpu; 1944 1945 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; 1946 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; 1947 1948 /* 1949 * An SEV-ES guest requires a VMSA area that is a separate from the 1950 * VMCB page. Do not include the encryption mask on the VMSA physical 1951 * address since hardware will access it using the guest key. 1952 */ 1953 svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); 1954 1955 /* Can't intercept CR register access, HV can't modify CR registers */ 1956 svm_clr_intercept(svm, INTERCEPT_CR0_READ); 1957 svm_clr_intercept(svm, INTERCEPT_CR4_READ); 1958 svm_clr_intercept(svm, INTERCEPT_CR8_READ); 1959 svm_clr_intercept(svm, INTERCEPT_CR0_WRITE); 1960 svm_clr_intercept(svm, INTERCEPT_CR4_WRITE); 1961 svm_clr_intercept(svm, INTERCEPT_CR8_WRITE); 1962 1963 svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0); 1964 1965 /* Track EFER/CR register changes */ 1966 svm_set_intercept(svm, TRAP_EFER_WRITE); 1967 svm_set_intercept(svm, TRAP_CR0_WRITE); 1968 svm_set_intercept(svm, TRAP_CR4_WRITE); 1969 svm_set_intercept(svm, TRAP_CR8_WRITE); 1970 1971 /* No support for enable_vmware_backdoor */ 1972 clr_exception_intercept(svm, GP_VECTOR); 1973 1974 /* Can't intercept XSETBV, HV can't modify XCR0 directly */ 1975 svm_clr_intercept(svm, INTERCEPT_XSETBV); 1976 1977 /* Clear intercepts on selected MSRs */ 1978 set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); 1979 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); 1980 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); 1981 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 1982 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 1983 set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 1984 } 1985 1986 void sev_es_create_vcpu(struct vcpu_svm *svm) 1987 { 1988 /* 1989 * Set the GHCB MSR value as per the GHCB specification when creating 1990 * a vCPU for an SEV-ES guest. 1991 */ 1992 set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX, 1993 GHCB_VERSION_MIN, 1994 sev_enc_bit)); 1995 } 1996 1997 void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) 1998 { 1999 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 2000 struct vmcb_save_area *hostsa; 2001 unsigned int i; 2002 2003 /* 2004 * As an SEV-ES guest, hardware will restore the host state on VMEXIT, 2005 * of which one step is to perform a VMLOAD. Since hardware does not 2006 * perform a VMSAVE on VMRUN, the host savearea must be updated. 2007 */ 2008 asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); 2009 2010 /* 2011 * Certain MSRs are restored on VMEXIT, only save ones that aren't 2012 * restored. 2013 */ 2014 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { 2015 if (host_save_user_msrs[i].sev_es_restored) 2016 continue; 2017 2018 rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); 2019 } 2020 2021 /* XCR0 is restored on VMEXIT, save the current host value */ 2022 hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); 2023 hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 2024 2025 /* PKRU is restored on VMEXIT, save the curent host value */ 2026 hostsa->pkru = read_pkru(); 2027 2028 /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ 2029 hostsa->xss = host_xss; 2030 } 2031 2032 void sev_es_vcpu_put(struct vcpu_svm *svm) 2033 { 2034 unsigned int i; 2035 2036 /* 2037 * Certain MSRs are restored on VMEXIT and were saved with vmsave in 2038 * sev_es_vcpu_load() above. Only restore ones that weren't. 2039 */ 2040 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) { 2041 if (host_save_user_msrs[i].sev_es_restored) 2042 continue; 2043 2044 wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); 2045 } 2046 } 2047 2048 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) 2049 { 2050 struct vcpu_svm *svm = to_svm(vcpu); 2051 2052 /* First SIPI: Use the values as initially set by the VMM */ 2053 if (!svm->received_first_sipi) { 2054 svm->received_first_sipi = true; 2055 return; 2056 } 2057 2058 /* 2059 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where 2060 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a 2061 * non-zero value. 2062 */ 2063 ghcb_set_sw_exit_info_2(svm->ghcb, 1); 2064 } 2065