1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hosting Protected Virtual Machines 4 * 5 * Copyright IBM Corp. 2019, 2020 6 * Author(s): Janosch Frank <frankja@linux.ibm.com> 7 */ 8 9 #include <linux/export.h> 10 #include <linux/kvm.h> 11 #include <linux/kvm_host.h> 12 #include <linux/minmax.h> 13 #include <linux/pagemap.h> 14 #include <linux/sched/signal.h> 15 #include <asm/uv.h> 16 #include <asm/mman.h> 17 #include <linux/pagewalk.h> 18 #include <linux/sched/mm.h> 19 #include <linux/mmu_notifier.h> 20 #include <asm/gmap_helpers.h> 21 #include "kvm-s390.h" 22 #include "dat.h" 23 #include "gaccess.h" 24 #include "gmap.h" 25 #include "faultin.h" 26 27 bool kvm_s390_pv_is_protected(struct kvm *kvm) 28 { 29 lockdep_assert_held(&kvm->lock); 30 return !!kvm_s390_pv_get_handle(kvm); 31 } 32 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); 33 34 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) 35 { 36 lockdep_assert_held(&vcpu->mutex); 37 return !!kvm_s390_pv_cpu_get_handle(vcpu); 38 } 39 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); 40 41 /** 42 * should_export_before_import() - Determine whether an export is needed 43 * before an import-like operation. 44 * @uvcb: The Ultravisor control block of the UVC to be performed. 45 * @mm: The mm of the process. 46 * 47 * Returns whether an export is needed before every import-like operation. 48 * This is needed for shared pages, which don't trigger a secure storage 49 * exception when accessed from a different guest. 50 * 51 * Although considered as one, the Unpin Page UVC is not an actual import, 52 * so it is not affected. 53 * 54 * No export is needed also when there is only one protected VM, because the 55 * page cannot belong to the wrong VM in that case (there is no "other VM" 56 * it can belong to). 57 * 58 * Return: %true if an export is needed before every import, otherwise %false. 59 */ 60 static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) 61 { 62 /* 63 * The misc feature indicates, among other things, that importing a 64 * shared page from a different protected VM will automatically also 65 * transfer its ownership. 66 */ 67 if (uv_has_feature(BIT_UV_FEAT_MISC)) 68 return false; 69 if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) 70 return false; 71 return atomic_read(&mm->context.protected_count) > 1; 72 } 73 74 struct pv_make_secure { 75 void *uvcb; 76 struct folio *folio; 77 struct kvm *kvm; 78 int rc; 79 bool needs_export; 80 }; 81 82 static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio) 83 { 84 struct pv_make_secure *priv = f->priv; 85 int rc; 86 87 if (priv->needs_export) 88 uv_convert_from_secure(folio_to_phys(folio)); 89 90 if (folio_test_hugetlb(folio)) 91 return -EFAULT; 92 if (folio_test_large(folio)) 93 return -E2BIG; 94 95 if (!f->page) 96 folio_get(folio); 97 rc = __make_folio_secure(folio, priv->uvcb); 98 if (!f->page) 99 folio_put(folio); 100 101 return rc; 102 } 103 104 static void _kvm_s390_pv_make_secure(struct guest_fault *f) 105 { 106 struct pv_make_secure *priv = f->priv; 107 struct folio *folio; 108 spinlock_t *ptl; /* pte lock from try_get_locked_pte() */ 109 pte_t *ptep; 110 111 folio = pfn_folio(f->pfn); 112 priv->rc = -EAGAIN; 113 114 if (!mmap_read_trylock(priv->kvm->mm)) 115 return; 116 117 ptep = try_get_locked_pte(priv->kvm->mm, gfn_to_hva(priv->kvm, f->gfn), &ptl); 118 if (IS_ERR_VALUE(ptep)) { 119 priv->rc = PTR_ERR(ptep); 120 goto out; 121 } 122 123 if (folio_trylock(folio)) { 124 priv->rc = __kvm_s390_pv_make_secure(f, folio); 125 if (priv->rc == -E2BIG || priv->rc == -EBUSY) { 126 priv->folio = folio; 127 folio_get(folio); 128 } 129 folio_unlock(folio); 130 } 131 132 if (ptep) 133 pte_unmap_unlock(ptep, ptl); 134 out: 135 mmap_read_unlock(priv->kvm->mm); 136 } 137 138 /** 139 * kvm_s390_pv_make_secure() - make one guest page secure 140 * @kvm: the guest 141 * @gaddr: the guest address that needs to be made secure 142 * @uvcb: the UVCB specifying which operation needs to be performed 143 * 144 * Context: needs to be called with kvm->srcu held. 145 * Return: 0 on success, < 0 in case of error. 146 */ 147 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) 148 { 149 struct pv_make_secure priv = { .uvcb = uvcb, .kvm = kvm, }; 150 struct guest_fault f = { 151 .write_attempt = true, 152 .gfn = gpa_to_gfn(gaddr), 153 .callback = _kvm_s390_pv_make_secure, 154 .priv = &priv, 155 }; 156 int rc; 157 158 lockdep_assert_held(&kvm->srcu); 159 160 priv.needs_export = should_export_before_import(uvcb, kvm->mm); 161 162 scoped_guard(mutex, &kvm->arch.pv.import_lock) { 163 rc = kvm_s390_faultin_gfn(NULL, kvm, &f); 164 165 if (!rc) { 166 rc = priv.rc; 167 if (priv.folio) { 168 rc = s390_wiggle_split_folio(kvm->mm, priv.folio); 169 if (!rc) 170 rc = -EAGAIN; 171 } 172 } 173 } 174 if (priv.folio) 175 folio_put(priv.folio); 176 return rc; 177 } 178 179 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr) 180 { 181 struct uv_cb_cts uvcb = { 182 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, 183 .header.len = sizeof(uvcb), 184 .guest_handle = kvm_s390_pv_get_handle(kvm), 185 .gaddr = gaddr, 186 }; 187 188 return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb); 189 } 190 191 /** 192 * kvm_s390_pv_destroy_page() - Destroy a guest page. 193 * @kvm: the guest 194 * @gaddr: the guest address to destroy 195 * 196 * An attempt will be made to destroy the given guest page. If the attempt 197 * fails, an attempt is made to export the page. If both attempts fail, an 198 * appropriate error is returned. 199 * 200 * Context: may sleep. 201 */ 202 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr) 203 { 204 struct page *page; 205 int rc = 0; 206 207 mmap_read_lock(kvm->mm); 208 page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); 209 if (page) 210 rc = __kvm_s390_pv_destroy_page(page); 211 kvm_release_page_clean(page); 212 mmap_read_unlock(kvm->mm); 213 return rc; 214 } 215 216 /** 217 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to 218 * be destroyed 219 * 220 * @list: list head for the list of leftover VMs 221 * @old_gmap_table: the gmap table of the leftover protected VM 222 * @handle: the handle of the leftover protected VM 223 * @stor_var: pointer to the variable storage of the leftover protected VM 224 * @stor_base: address of the base storage of the leftover protected VM 225 * 226 * Represents a protected VM that is still registered with the Ultravisor, 227 * but which does not correspond any longer to an active KVM VM. It should 228 * be destroyed at some point later, either asynchronously or when the 229 * process terminates. 230 */ 231 struct pv_vm_to_be_destroyed { 232 struct list_head list; 233 unsigned long old_gmap_table; 234 u64 handle; 235 void *stor_var; 236 unsigned long stor_base; 237 }; 238 239 static void kvm_s390_clear_pv_state(struct kvm *kvm) 240 { 241 kvm->arch.pv.handle = 0; 242 kvm->arch.pv.guest_len = 0; 243 kvm->arch.pv.stor_base = 0; 244 kvm->arch.pv.stor_var = NULL; 245 } 246 247 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) 248 { 249 int cc; 250 251 if (!kvm_s390_pv_cpu_get_handle(vcpu)) 252 return 0; 253 254 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); 255 256 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", 257 vcpu->vcpu_id, *rc, *rrc); 258 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); 259 260 /* Intended memory leak for something that should never happen. */ 261 if (!cc) 262 free_pages(vcpu->arch.pv.stor_base, 263 get_order(uv_info.guest_cpu_stor_len)); 264 265 free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); 266 vcpu->arch.sie_block->pv_handle_cpu = 0; 267 vcpu->arch.sie_block->pv_handle_config = 0; 268 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); 269 vcpu->arch.sie_block->sdf = 0; 270 /* 271 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0). 272 * Use the reset value of gbea to avoid leaking the kernel pointer of 273 * the just freed sida. 274 */ 275 vcpu->arch.sie_block->gbea = 1; 276 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 277 278 return cc ? EIO : 0; 279 } 280 281 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) 282 { 283 struct uv_cb_csc uvcb = { 284 .header.cmd = UVC_CMD_CREATE_SEC_CPU, 285 .header.len = sizeof(uvcb), 286 }; 287 void *sida_addr; 288 int cc; 289 290 if (kvm_s390_pv_cpu_get_handle(vcpu)) 291 return -EINVAL; 292 293 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, 294 get_order(uv_info.guest_cpu_stor_len)); 295 if (!vcpu->arch.pv.stor_base) 296 return -ENOMEM; 297 298 /* Input */ 299 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); 300 uvcb.num = vcpu->arch.sie_block->icpua; 301 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); 302 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); 303 304 /* Alloc Secure Instruction Data Area Designation */ 305 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 306 if (!sida_addr) { 307 free_pages(vcpu->arch.pv.stor_base, 308 get_order(uv_info.guest_cpu_stor_len)); 309 return -ENOMEM; 310 } 311 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); 312 313 cc = uv_call(0, (u64)&uvcb); 314 *rc = uvcb.header.rc; 315 *rrc = uvcb.header.rrc; 316 KVM_UV_EVENT(vcpu->kvm, 3, 317 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x", 318 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc, 319 uvcb.header.rrc); 320 321 if (cc) { 322 u16 dummy; 323 324 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy); 325 return -EIO; 326 } 327 328 /* Output */ 329 vcpu->arch.pv.handle = uvcb.cpu_handle; 330 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle; 331 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm); 332 vcpu->arch.sie_block->sdf = 2; 333 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 334 return 0; 335 } 336 337 /* only free resources when the destroy was successful */ 338 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm) 339 { 340 vfree(kvm->arch.pv.stor_var); 341 free_pages(kvm->arch.pv.stor_base, 342 get_order(uv_info.guest_base_stor_len)); 343 kvm_s390_clear_pv_state(kvm); 344 } 345 346 static int kvm_s390_pv_alloc_vm(struct kvm *kvm) 347 { 348 unsigned long base = uv_info.guest_base_stor_len; 349 unsigned long virt = uv_info.guest_virt_var_stor_len; 350 unsigned long npages = 0, vlen = 0; 351 352 kvm->arch.pv.stor_var = NULL; 353 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base)); 354 if (!kvm->arch.pv.stor_base) 355 return -ENOMEM; 356 357 /* 358 * Calculate current guest storage for allocation of the 359 * variable storage, which is based on the length in MB. 360 * 361 * Slots are sorted by GFN 362 */ 363 mutex_lock(&kvm->slots_lock); 364 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm)); 365 mutex_unlock(&kvm->slots_lock); 366 367 kvm->arch.pv.guest_len = npages * PAGE_SIZE; 368 369 /* Allocate variable storage */ 370 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); 371 vlen += uv_info.guest_virt_base_stor_len; 372 kvm->arch.pv.stor_var = vzalloc(vlen); 373 if (!kvm->arch.pv.stor_var) 374 goto out_err; 375 return 0; 376 377 out_err: 378 kvm_s390_pv_dealloc_vm(kvm); 379 return -ENOMEM; 380 } 381 382 /** 383 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. 384 * @kvm: the KVM that was associated with this leftover protected VM 385 * @leftover: details about the leftover protected VM that needs a clean up 386 * @rc: the RC code of the Destroy Secure Configuration UVC 387 * @rrc: the RRC code of the Destroy Secure Configuration UVC 388 * 389 * Destroy one leftover protected VM. 390 * On success, kvm->mm->context.protected_count will be decremented atomically 391 * and all other resources used by the VM will be freed. 392 * 393 * Return: 0 in case of success, otherwise 1 394 */ 395 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, 396 struct pv_vm_to_be_destroyed *leftover, 397 u16 *rc, u16 *rrc) 398 { 399 int cc; 400 401 /* It used the destroy-fast UVC, nothing left to do here */ 402 if (!leftover->handle) 403 goto done_fast; 404 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); 405 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); 406 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); 407 if (cc) 408 return cc; 409 /* 410 * Intentionally leak unusable memory. If the UVC fails, the memory 411 * used for the VM and its metadata is permanently unusable. 412 * This can only happen in case of a serious KVM or hardware bug; it 413 * is not expected to happen in normal operation. 414 */ 415 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); 416 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); 417 vfree(leftover->stor_var); 418 done_fast: 419 atomic_dec(&kvm->mm->context.protected_count); 420 return 0; 421 } 422 423 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) 424 { 425 struct uv_cb_destroy_fast uvcb = { 426 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, 427 .header.len = sizeof(uvcb), 428 .handle = kvm_s390_pv_get_handle(kvm), 429 }; 430 int cc; 431 432 cc = uv_call_sched(0, (u64)&uvcb); 433 if (rc) 434 *rc = uvcb.header.rc; 435 if (rrc) 436 *rrc = uvcb.header.rrc; 437 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", 438 uvcb.header.rc, uvcb.header.rrc); 439 WARN_ONCE(cc && uvcb.header.rc != 0x104, 440 "protvirt destroy vm fast failed handle %llx rc %x rrc %x", 441 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); 442 /* Intended memory leak on "impossible" error */ 443 if (!cc) 444 kvm_s390_pv_dealloc_vm(kvm); 445 return cc ? -EIO : 0; 446 } 447 448 static inline bool is_destroy_fast_available(void) 449 { 450 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); 451 } 452 453 /** 454 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. 455 * @kvm: the VM 456 * @rc: return value for the RC field of the UVCB 457 * @rrc: return value for the RRC field of the UVCB 458 * 459 * Set aside the protected VM for a subsequent teardown. The VM will be able 460 * to continue immediately as a non-secure VM, and the information needed to 461 * properly tear down the protected VM is set aside. If another protected VM 462 * was already set aside without starting its teardown, this function will 463 * fail. 464 * The CPUs of the protected VM need to be destroyed beforehand. 465 * 466 * Context: kvm->lock needs to be held 467 * 468 * Return: 0 in case of success, -EINVAL if another protected VM was already set 469 * aside, -ENOMEM if the system ran out of memory. 470 */ 471 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) 472 { 473 struct pv_vm_to_be_destroyed *priv; 474 int res = 0; 475 476 lockdep_assert_held(&kvm->lock); 477 /* 478 * If another protected VM was already prepared for teardown, refuse. 479 * A normal deinitialization has to be performed instead. 480 */ 481 if (kvm->arch.pv.set_aside) 482 return -EINVAL; 483 484 /* Guest with segment type ASCE, refuse to destroy asynchronously */ 485 if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT) 486 return -EINVAL; 487 488 priv = kzalloc_obj(*priv); 489 if (!priv) 490 return -ENOMEM; 491 492 if (is_destroy_fast_available()) { 493 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); 494 } else { 495 priv->stor_var = kvm->arch.pv.stor_var; 496 priv->stor_base = kvm->arch.pv.stor_base; 497 priv->handle = kvm_s390_pv_get_handle(kvm); 498 priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce); 499 if (s390_replace_asce(kvm->arch.gmap)) 500 res = -ENOMEM; 501 } 502 503 if (res) { 504 kfree(priv); 505 return res; 506 } 507 508 gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false); 509 kvm_s390_clear_pv_state(kvm); 510 kvm->arch.pv.set_aside = priv; 511 512 *rc = UVC_RC_EXECUTED; 513 *rrc = 42; 514 return 0; 515 } 516 517 /** 518 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM 519 * @kvm: the KVM whose protected VM needs to be deinitialized 520 * @rc: the RC code of the UVC 521 * @rrc: the RRC code of the UVC 522 * 523 * Deinitialize the current protected VM. This function will destroy and 524 * cleanup the current protected VM, but it will not cleanup the guest 525 * memory. This function should only be called when the protected VM has 526 * just been created and therefore does not have any guest memory, or when 527 * the caller cleans up the guest memory separately. 528 * 529 * This function should not fail, but if it does, the donated memory must 530 * not be freed. 531 * 532 * Context: kvm->lock needs to be held 533 * 534 * Return: 0 in case of success, otherwise -EIO 535 */ 536 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 537 { 538 int cc; 539 540 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 541 UVC_CMD_DESTROY_SEC_CONF, rc, rrc); 542 if (!cc) { 543 atomic_dec(&kvm->mm->context.protected_count); 544 kvm_s390_pv_dealloc_vm(kvm); 545 } else { 546 /* Intended memory leak on "impossible" error */ 547 s390_replace_asce(kvm->arch.gmap); 548 } 549 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc); 550 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc); 551 552 return cc ? -EIO : 0; 553 } 554 555 /** 556 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated 557 * with a specific KVM. 558 * @kvm: the KVM to be cleaned up 559 * @rc: the RC code of the first failing UVC 560 * @rrc: the RRC code of the first failing UVC 561 * 562 * This function will clean up all protected VMs associated with a KVM. 563 * This includes the active one, the one prepared for deinitialization with 564 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. 565 * 566 * Context: kvm->lock needs to be held unless being called from 567 * kvm_arch_destroy_vm. 568 * 569 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO 570 */ 571 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) 572 { 573 struct pv_vm_to_be_destroyed *cur; 574 bool need_zap = false; 575 u16 _rc, _rrc; 576 int cc = 0; 577 578 /* 579 * Nothing to do if the counter was already 0. Otherwise make sure 580 * the counter does not reach 0 before calling s390_uv_destroy_range. 581 */ 582 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count)) 583 return 0; 584 585 *rc = 1; 586 /* If the current VM is protected, destroy it */ 587 if (kvm_s390_pv_get_handle(kvm)) { 588 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); 589 need_zap = true; 590 } 591 592 /* If a previous protected VM was set aside, put it in the need_cleanup list */ 593 if (kvm->arch.pv.set_aside) { 594 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); 595 kvm->arch.pv.set_aside = NULL; 596 } 597 598 /* Cleanup all protected VMs in the need_cleanup list */ 599 while (!list_empty(&kvm->arch.pv.need_cleanup)) { 600 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); 601 need_zap = true; 602 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { 603 cc = 1; 604 /* 605 * Only return the first error rc and rrc, so make 606 * sure it is not overwritten. All destroys will 607 * additionally be reported via KVM_UV_EVENT(). 608 */ 609 if (*rc == UVC_RC_EXECUTED) { 610 *rc = _rc; 611 *rrc = _rrc; 612 } 613 } 614 list_del(&cur->list); 615 kfree(cur); 616 } 617 618 /* 619 * If the mm still has a mapping, try to mark all its pages as 620 * accessible. The counter should not reach zero before this 621 * cleanup has been performed. 622 */ 623 if (need_zap && mmget_not_zero(kvm->mm)) { 624 gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false); 625 mmput(kvm->mm); 626 } 627 628 /* Now the counter can safely reach 0 */ 629 atomic_dec(&kvm->mm->context.protected_count); 630 return cc ? -EIO : 0; 631 } 632 633 /** 634 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. 635 * @kvm: the VM previously associated with the protected VM 636 * @rc: return value for the RC field of the UVCB 637 * @rrc: return value for the RRC field of the UVCB 638 * 639 * Tear down the protected VM that had been previously prepared for teardown 640 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by 641 * userspace asynchronously from a separate thread. 642 * 643 * Context: kvm->lock must not be held. 644 * 645 * Return: 0 in case of success, -EINVAL if no protected VM had been 646 * prepared for asynchronous teardowm, -EIO in case of other errors. 647 */ 648 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 649 { 650 struct pv_vm_to_be_destroyed *p; 651 int ret = 0; 652 653 lockdep_assert_not_held(&kvm->lock); 654 mutex_lock(&kvm->lock); 655 p = kvm->arch.pv.set_aside; 656 kvm->arch.pv.set_aside = NULL; 657 mutex_unlock(&kvm->lock); 658 if (!p) 659 return -EINVAL; 660 661 /* When a fatal signal is received, stop immediately */ 662 if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true)) 663 goto done; 664 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) 665 ret = -EIO; 666 kfree(p); 667 p = NULL; 668 done: 669 /* 670 * p is not NULL if we aborted because of a fatal signal, in which 671 * case queue the leftover for later cleanup. 672 */ 673 if (p) { 674 mutex_lock(&kvm->lock); 675 list_add(&p->list, &kvm->arch.pv.need_cleanup); 676 mutex_unlock(&kvm->lock); 677 /* Did not finish, but pretend things went well */ 678 *rc = UVC_RC_EXECUTED; 679 *rrc = 42; 680 } 681 return ret; 682 } 683 684 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, 685 struct mm_struct *mm) 686 { 687 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); 688 u16 dummy; 689 int r; 690 691 /* 692 * No locking is needed since this is the last thread of the last user of this 693 * struct mm. 694 * When the struct kvm gets deinitialized, this notifier is also 695 * unregistered. This means that if this notifier runs, then the 696 * struct kvm is still valid. 697 */ 698 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 699 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) 700 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); 701 set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags); 702 } 703 704 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { 705 .release = kvm_s390_pv_mmu_notifier_release, 706 }; 707 708 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 709 { 710 struct uv_cb_cgc uvcb = { 711 .header.cmd = UVC_CMD_CREATE_SEC_CONF, 712 .header.len = sizeof(uvcb) 713 }; 714 int cc, ret; 715 u16 dummy; 716 717 /* Add the notifier only once. No races because we hold kvm->lock */ 718 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { 719 /* The notifier will be unregistered when the VM is destroyed */ 720 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; 721 ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); 722 if (ret) { 723 kvm->arch.pv.mmu_notifier.ops = NULL; 724 return ret; 725 } 726 } 727 728 ret = kvm_s390_pv_alloc_vm(kvm); 729 if (ret) 730 return ret; 731 732 /* Inputs */ 733 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ 734 uvcb.guest_stor_len = kvm->arch.pv.guest_len; 735 uvcb.guest_asce = kvm->arch.gmap->asce.val; 736 uvcb.guest_sca = virt_to_phys(kvm->arch.sca); 737 uvcb.conf_base_stor_origin = 738 virt_to_phys((void *)kvm->arch.pv.stor_base); 739 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; 740 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap; 741 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr; 742 743 clear_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &kvm->arch.gmap->flags); 744 gmap_split_huge_pages(kvm->arch.gmap); 745 746 cc = uv_call_sched(0, (u64)&uvcb); 747 *rc = uvcb.header.rc; 748 *rrc = uvcb.header.rrc; 749 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x", 750 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw); 751 752 /* Outputs */ 753 kvm->arch.pv.handle = uvcb.guest_handle; 754 755 atomic_inc(&kvm->mm->context.protected_count); 756 if (cc) { 757 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) { 758 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 759 } else { 760 atomic_dec(&kvm->mm->context.protected_count); 761 kvm_s390_pv_dealloc_vm(kvm); 762 } 763 return -EIO; 764 } 765 return 0; 766 } 767 768 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, 769 u16 *rrc) 770 { 771 struct uv_cb_ssc uvcb = { 772 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS, 773 .header.len = sizeof(uvcb), 774 .sec_header_origin = (u64)hdr, 775 .sec_header_len = length, 776 .guest_handle = kvm_s390_pv_get_handle(kvm), 777 }; 778 int cc = uv_call(0, (u64)&uvcb); 779 780 *rc = uvcb.header.rc; 781 *rrc = uvcb.header.rrc; 782 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x", 783 *rc, *rrc); 784 return cc ? -EINVAL : 0; 785 } 786 787 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, 788 u64 offset, u16 *rc, u16 *rrc) 789 { 790 struct uv_cb_unp uvcb = { 791 .header.cmd = UVC_CMD_UNPACK_IMG, 792 .header.len = sizeof(uvcb), 793 .guest_handle = kvm_s390_pv_get_handle(kvm), 794 .gaddr = addr, 795 .tweak[0] = tweak, 796 .tweak[1] = offset, 797 }; 798 int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb); 799 800 *rc = uvcb.header.rc; 801 *rrc = uvcb.header.rrc; 802 803 if (ret == -ENXIO) { 804 ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true); 805 if (!ret) 806 return -EAGAIN; 807 } 808 809 if (ret && ret != -EAGAIN) 810 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", 811 uvcb.gaddr, *rc, *rrc); 812 return ret; 813 } 814 815 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, 816 unsigned long tweak, u16 *rc, u16 *rrc) 817 { 818 u64 offset = 0; 819 int ret = 0; 820 821 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK) 822 return -EINVAL; 823 824 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", 825 addr, size); 826 827 guard(srcu)(&kvm->srcu); 828 829 while (offset < size) { 830 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); 831 if (ret == -EAGAIN) { 832 cond_resched(); 833 if (fatal_signal_pending(current)) 834 break; 835 continue; 836 } 837 if (ret) 838 break; 839 addr += PAGE_SIZE; 840 offset += PAGE_SIZE; 841 } 842 if (!ret) 843 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful"); 844 return ret; 845 } 846 847 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state) 848 { 849 struct uv_cb_cpu_set_state uvcb = { 850 .header.cmd = UVC_CMD_CPU_SET_STATE, 851 .header.len = sizeof(uvcb), 852 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu), 853 .state = state, 854 }; 855 int cc; 856 857 cc = uv_call(0, (u64)&uvcb); 858 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x", 859 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc); 860 if (cc) 861 return -EINVAL; 862 return 0; 863 } 864 865 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc) 866 { 867 struct uv_cb_dump_cpu uvcb = { 868 .header.cmd = UVC_CMD_DUMP_CPU, 869 .header.len = sizeof(uvcb), 870 .cpu_handle = vcpu->arch.pv.handle, 871 .dump_area_origin = (u64)buff, 872 }; 873 int cc; 874 875 cc = uv_call_sched(0, (u64)&uvcb); 876 *rc = uvcb.header.rc; 877 *rrc = uvcb.header.rrc; 878 return cc; 879 } 880 881 /* Size of the cache for the storage state dump data. 1MB for now */ 882 #define DUMP_BUFF_LEN HPAGE_SIZE 883 884 /** 885 * kvm_s390_pv_dump_stor_state 886 * 887 * @kvm: pointer to the guest's KVM struct 888 * @buff_user: Userspace pointer where we will write the results to 889 * @gaddr: Starting absolute guest address for which the storage state 890 * is requested. 891 * @buff_user_len: Length of the buff_user buffer 892 * @rc: Pointer to where the uvcb return code is stored 893 * @rrc: Pointer to where the uvcb return reason code is stored 894 * 895 * Stores buff_len bytes of tweak component values to buff_user 896 * starting with the 1MB block specified by the absolute guest address 897 * (gaddr). The gaddr pointer will be updated with the last address 898 * for which data was written when returning to userspace. buff_user 899 * might be written to even if an error rc is returned. For instance 900 * if we encounter a fault after writing the first page of data. 901 * 902 * Context: kvm->lock needs to be held 903 * 904 * Return: 905 * 0 on success 906 * -ENOMEM if allocating the cache fails 907 * -EINVAL if gaddr is not aligned to 1MB 908 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len 909 * -EINVAL if the UV call fails, rc and rrc will be set in this case 910 * -EFAULT if copying the result to buff_user failed 911 */ 912 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, 913 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc) 914 { 915 struct uv_cb_dump_stor_state uvcb = { 916 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE, 917 .header.len = sizeof(uvcb), 918 .config_handle = kvm->arch.pv.handle, 919 .gaddr = *gaddr, 920 .dump_area_origin = 0, 921 }; 922 const u64 increment_len = uv_info.conf_dump_storage_state_len; 923 size_t buff_kvm_size; 924 size_t size_done = 0; 925 u8 *buff_kvm = NULL; 926 int cc, ret; 927 928 ret = -EINVAL; 929 /* UV call processes 1MB guest storage chunks at a time */ 930 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE)) 931 goto out; 932 933 /* 934 * We provide the storage state for 1MB chunks of guest 935 * storage. The buffer will need to be aligned to 936 * conf_dump_storage_state_len so we don't end on a partial 937 * chunk. 938 */ 939 if (!buff_user_len || 940 !IS_ALIGNED(buff_user_len, increment_len)) 941 goto out; 942 943 /* 944 * Allocate a buffer from which we will later copy to the user 945 * process. We don't want userspace to dictate our buffer size 946 * so we limit it to DUMP_BUFF_LEN. 947 */ 948 ret = -ENOMEM; 949 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN); 950 buff_kvm = vzalloc(buff_kvm_size); 951 if (!buff_kvm) 952 goto out; 953 954 ret = 0; 955 uvcb.dump_area_origin = (u64)buff_kvm; 956 /* We will loop until the user buffer is filled or an error occurs */ 957 do { 958 /* Get 1MB worth of guest storage state data */ 959 cc = uv_call_sched(0, (u64)&uvcb); 960 961 /* All or nothing */ 962 if (cc) { 963 ret = -EINVAL; 964 break; 965 } 966 967 size_done += increment_len; 968 uvcb.dump_area_origin += increment_len; 969 buff_user_len -= increment_len; 970 uvcb.gaddr += HPAGE_SIZE; 971 972 /* KVM Buffer full, time to copy to the process */ 973 if (!buff_user_len || size_done == DUMP_BUFF_LEN) { 974 if (copy_to_user(buff_user, buff_kvm, size_done)) { 975 ret = -EFAULT; 976 break; 977 } 978 979 buff_user += size_done; 980 size_done = 0; 981 uvcb.dump_area_origin = (u64)buff_kvm; 982 } 983 } while (buff_user_len); 984 985 /* Report back where we ended dumping */ 986 *gaddr = uvcb.gaddr; 987 988 /* Lets only log errors, we don't want to spam */ 989 out: 990 if (ret) 991 KVM_UV_EVENT(kvm, 3, 992 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x", 993 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc); 994 *rc = uvcb.header.rc; 995 *rrc = uvcb.header.rrc; 996 vfree(buff_kvm); 997 998 return ret; 999 } 1000 1001 /** 1002 * kvm_s390_pv_dump_complete 1003 * 1004 * @kvm: pointer to the guest's KVM struct 1005 * @buff_user: Userspace pointer where we will write the results to 1006 * @rc: Pointer to where the uvcb return code is stored 1007 * @rrc: Pointer to where the uvcb return reason code is stored 1008 * 1009 * Completes the dumping operation and writes the completion data to 1010 * user space. 1011 * 1012 * Context: kvm->lock needs to be held 1013 * 1014 * Return: 1015 * 0 on success 1016 * -ENOMEM if allocating the completion buffer fails 1017 * -EINVAL if the UV call fails, rc and rrc will be set in this case 1018 * -EFAULT if copying the result to buff_user failed 1019 */ 1020 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, 1021 u16 *rc, u16 *rrc) 1022 { 1023 struct uv_cb_dump_complete complete = { 1024 .header.len = sizeof(complete), 1025 .header.cmd = UVC_CMD_DUMP_COMPLETE, 1026 .config_handle = kvm_s390_pv_get_handle(kvm), 1027 }; 1028 u64 *compl_data; 1029 int ret; 1030 1031 /* Allocate dump area */ 1032 compl_data = vzalloc(uv_info.conf_dump_finalize_len); 1033 if (!compl_data) 1034 return -ENOMEM; 1035 complete.dump_area_origin = (u64)compl_data; 1036 1037 ret = uv_call_sched(0, (u64)&complete); 1038 *rc = complete.header.rc; 1039 *rrc = complete.header.rrc; 1040 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x", 1041 complete.header.rc, complete.header.rrc); 1042 1043 if (!ret) { 1044 /* 1045 * kvm_s390_pv_dealloc_vm() will also (mem)set 1046 * this to false on a reboot or other destroy 1047 * operation for this vm. 1048 */ 1049 kvm->arch.pv.dumping = false; 1050 kvm_s390_vcpu_unblock_all(kvm); 1051 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len); 1052 if (ret) 1053 ret = -EFAULT; 1054 } 1055 vfree(compl_data); 1056 /* If the UVC returned an error, translate it to -EINVAL */ 1057 if (ret > 0) 1058 ret = -EINVAL; 1059 return ret; 1060 } 1061