1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hosting Protected Virtual Machines 4 * 5 * Copyright IBM Corp. 2019, 2020 6 * Author(s): Janosch Frank <frankja@linux.ibm.com> 7 */ 8 9 #include <linux/export.h> 10 #include <linux/kvm.h> 11 #include <linux/kvm_host.h> 12 #include <linux/minmax.h> 13 #include <linux/pagemap.h> 14 #include <linux/sched/signal.h> 15 #include <asm/uv.h> 16 #include <asm/mman.h> 17 #include <linux/pagewalk.h> 18 #include <linux/sched/mm.h> 19 #include <linux/mmu_notifier.h> 20 #include "kvm-s390.h" 21 #include "dat.h" 22 #include "gaccess.h" 23 #include "gmap.h" 24 #include "faultin.h" 25 26 bool kvm_s390_pv_is_protected(struct kvm *kvm) 27 { 28 lockdep_assert_held(&kvm->lock); 29 return !!kvm_s390_pv_get_handle(kvm); 30 } 31 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); 32 33 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) 34 { 35 lockdep_assert_held(&vcpu->mutex); 36 return !!kvm_s390_pv_cpu_get_handle(vcpu); 37 } 38 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); 39 40 /** 41 * should_export_before_import() - Determine whether an export is needed 42 * before an import-like operation. 43 * @uvcb: The Ultravisor control block of the UVC to be performed. 44 * @mm: The mm of the process. 45 * 46 * Returns whether an export is needed before every import-like operation. 47 * This is needed for shared pages, which don't trigger a secure storage 48 * exception when accessed from a different guest. 49 * 50 * Although considered as one, the Unpin Page UVC is not an actual import, 51 * so it is not affected. 52 * 53 * No export is needed also when there is only one protected VM, because the 54 * page cannot belong to the wrong VM in that case (there is no "other VM" 55 * it can belong to). 56 * 57 * Return: %true if an export is needed before every import, otherwise %false. 58 */ 59 static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) 60 { 61 /* 62 * The misc feature indicates, among other things, that importing a 63 * shared page from a different protected VM will automatically also 64 * transfer its ownership. 65 */ 66 if (uv_has_feature(BIT_UV_FEAT_MISC)) 67 return false; 68 if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) 69 return false; 70 return atomic_read(&mm->context.protected_count) > 1; 71 } 72 73 struct pv_make_secure { 74 void *uvcb; 75 struct folio *folio; 76 int rc; 77 bool needs_export; 78 }; 79 80 static int __kvm_s390_pv_make_secure(struct guest_fault *f, struct folio *folio) 81 { 82 struct pv_make_secure *priv = f->priv; 83 int rc; 84 85 if (priv->needs_export) 86 uv_convert_from_secure(folio_to_phys(folio)); 87 88 if (folio_test_hugetlb(folio)) 89 return -EFAULT; 90 if (folio_test_large(folio)) 91 return -E2BIG; 92 93 if (!f->page) 94 folio_get(folio); 95 rc = __make_folio_secure(folio, priv->uvcb); 96 if (!f->page) 97 folio_put(folio); 98 99 return rc; 100 } 101 102 static void _kvm_s390_pv_make_secure(struct guest_fault *f) 103 { 104 struct pv_make_secure *priv = f->priv; 105 struct folio *folio; 106 107 folio = pfn_folio(f->pfn); 108 priv->rc = -EAGAIN; 109 if (folio_trylock(folio)) { 110 priv->rc = __kvm_s390_pv_make_secure(f, folio); 111 if (priv->rc == -E2BIG || priv->rc == -EBUSY) { 112 priv->folio = folio; 113 folio_get(folio); 114 } 115 folio_unlock(folio); 116 } 117 } 118 119 /** 120 * kvm_s390_pv_make_secure() - make one guest page secure 121 * @kvm: the guest 122 * @gaddr: the guest address that needs to be made secure 123 * @uvcb: the UVCB specifying which operation needs to be performed 124 * 125 * Context: needs to be called with kvm->srcu held. 126 * Return: 0 on success, < 0 in case of error. 127 */ 128 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) 129 { 130 struct pv_make_secure priv = { .uvcb = uvcb }; 131 struct guest_fault f = { 132 .write_attempt = true, 133 .gfn = gpa_to_gfn(gaddr), 134 .callback = _kvm_s390_pv_make_secure, 135 .priv = &priv, 136 }; 137 int rc; 138 139 lockdep_assert_held(&kvm->srcu); 140 141 priv.needs_export = should_export_before_import(uvcb, kvm->mm); 142 143 scoped_guard(mutex, &kvm->arch.pv.import_lock) { 144 rc = kvm_s390_faultin_gfn(NULL, kvm, &f); 145 146 if (!rc) { 147 rc = priv.rc; 148 if (priv.folio) { 149 rc = s390_wiggle_split_folio(kvm->mm, priv.folio); 150 if (!rc) 151 rc = -EAGAIN; 152 } 153 } 154 } 155 if (priv.folio) 156 folio_put(priv.folio); 157 return rc; 158 } 159 160 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr) 161 { 162 struct uv_cb_cts uvcb = { 163 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, 164 .header.len = sizeof(uvcb), 165 .guest_handle = kvm_s390_pv_get_handle(kvm), 166 .gaddr = gaddr, 167 }; 168 169 return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb); 170 } 171 172 /** 173 * kvm_s390_pv_destroy_page() - Destroy a guest page. 174 * @kvm: the guest 175 * @gaddr: the guest address to destroy 176 * 177 * An attempt will be made to destroy the given guest page. If the attempt 178 * fails, an attempt is made to export the page. If both attempts fail, an 179 * appropriate error is returned. 180 * 181 * Context: may sleep. 182 */ 183 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr) 184 { 185 struct page *page; 186 int rc = 0; 187 188 mmap_read_lock(kvm->mm); 189 page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); 190 if (page) 191 rc = __kvm_s390_pv_destroy_page(page); 192 kvm_release_page_clean(page); 193 mmap_read_unlock(kvm->mm); 194 return rc; 195 } 196 197 /** 198 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to 199 * be destroyed 200 * 201 * @list: list head for the list of leftover VMs 202 * @old_gmap_table: the gmap table of the leftover protected VM 203 * @handle: the handle of the leftover protected VM 204 * @stor_var: pointer to the variable storage of the leftover protected VM 205 * @stor_base: address of the base storage of the leftover protected VM 206 * 207 * Represents a protected VM that is still registered with the Ultravisor, 208 * but which does not correspond any longer to an active KVM VM. It should 209 * be destroyed at some point later, either asynchronously or when the 210 * process terminates. 211 */ 212 struct pv_vm_to_be_destroyed { 213 struct list_head list; 214 unsigned long old_gmap_table; 215 u64 handle; 216 void *stor_var; 217 unsigned long stor_base; 218 }; 219 220 static void kvm_s390_clear_pv_state(struct kvm *kvm) 221 { 222 kvm->arch.pv.handle = 0; 223 kvm->arch.pv.guest_len = 0; 224 kvm->arch.pv.stor_base = 0; 225 kvm->arch.pv.stor_var = NULL; 226 } 227 228 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) 229 { 230 int cc; 231 232 if (!kvm_s390_pv_cpu_get_handle(vcpu)) 233 return 0; 234 235 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); 236 237 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", 238 vcpu->vcpu_id, *rc, *rrc); 239 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); 240 241 /* Intended memory leak for something that should never happen. */ 242 if (!cc) 243 free_pages(vcpu->arch.pv.stor_base, 244 get_order(uv_info.guest_cpu_stor_len)); 245 246 free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); 247 vcpu->arch.sie_block->pv_handle_cpu = 0; 248 vcpu->arch.sie_block->pv_handle_config = 0; 249 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); 250 vcpu->arch.sie_block->sdf = 0; 251 /* 252 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0). 253 * Use the reset value of gbea to avoid leaking the kernel pointer of 254 * the just freed sida. 255 */ 256 vcpu->arch.sie_block->gbea = 1; 257 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 258 259 return cc ? EIO : 0; 260 } 261 262 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) 263 { 264 struct uv_cb_csc uvcb = { 265 .header.cmd = UVC_CMD_CREATE_SEC_CPU, 266 .header.len = sizeof(uvcb), 267 }; 268 void *sida_addr; 269 int cc; 270 271 if (kvm_s390_pv_cpu_get_handle(vcpu)) 272 return -EINVAL; 273 274 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, 275 get_order(uv_info.guest_cpu_stor_len)); 276 if (!vcpu->arch.pv.stor_base) 277 return -ENOMEM; 278 279 /* Input */ 280 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); 281 uvcb.num = vcpu->arch.sie_block->icpua; 282 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); 283 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); 284 285 /* Alloc Secure Instruction Data Area Designation */ 286 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 287 if (!sida_addr) { 288 free_pages(vcpu->arch.pv.stor_base, 289 get_order(uv_info.guest_cpu_stor_len)); 290 return -ENOMEM; 291 } 292 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); 293 294 cc = uv_call(0, (u64)&uvcb); 295 *rc = uvcb.header.rc; 296 *rrc = uvcb.header.rrc; 297 KVM_UV_EVENT(vcpu->kvm, 3, 298 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x", 299 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc, 300 uvcb.header.rrc); 301 302 if (cc) { 303 u16 dummy; 304 305 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy); 306 return -EIO; 307 } 308 309 /* Output */ 310 vcpu->arch.pv.handle = uvcb.cpu_handle; 311 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle; 312 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm); 313 vcpu->arch.sie_block->sdf = 2; 314 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 315 return 0; 316 } 317 318 /* only free resources when the destroy was successful */ 319 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm) 320 { 321 vfree(kvm->arch.pv.stor_var); 322 free_pages(kvm->arch.pv.stor_base, 323 get_order(uv_info.guest_base_stor_len)); 324 kvm_s390_clear_pv_state(kvm); 325 } 326 327 static int kvm_s390_pv_alloc_vm(struct kvm *kvm) 328 { 329 unsigned long base = uv_info.guest_base_stor_len; 330 unsigned long virt = uv_info.guest_virt_var_stor_len; 331 unsigned long npages = 0, vlen = 0; 332 333 kvm->arch.pv.stor_var = NULL; 334 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base)); 335 if (!kvm->arch.pv.stor_base) 336 return -ENOMEM; 337 338 /* 339 * Calculate current guest storage for allocation of the 340 * variable storage, which is based on the length in MB. 341 * 342 * Slots are sorted by GFN 343 */ 344 mutex_lock(&kvm->slots_lock); 345 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm)); 346 mutex_unlock(&kvm->slots_lock); 347 348 kvm->arch.pv.guest_len = npages * PAGE_SIZE; 349 350 /* Allocate variable storage */ 351 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); 352 vlen += uv_info.guest_virt_base_stor_len; 353 kvm->arch.pv.stor_var = vzalloc(vlen); 354 if (!kvm->arch.pv.stor_var) 355 goto out_err; 356 return 0; 357 358 out_err: 359 kvm_s390_pv_dealloc_vm(kvm); 360 return -ENOMEM; 361 } 362 363 /** 364 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. 365 * @kvm: the KVM that was associated with this leftover protected VM 366 * @leftover: details about the leftover protected VM that needs a clean up 367 * @rc: the RC code of the Destroy Secure Configuration UVC 368 * @rrc: the RRC code of the Destroy Secure Configuration UVC 369 * 370 * Destroy one leftover protected VM. 371 * On success, kvm->mm->context.protected_count will be decremented atomically 372 * and all other resources used by the VM will be freed. 373 * 374 * Return: 0 in case of success, otherwise 1 375 */ 376 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, 377 struct pv_vm_to_be_destroyed *leftover, 378 u16 *rc, u16 *rrc) 379 { 380 int cc; 381 382 /* It used the destroy-fast UVC, nothing left to do here */ 383 if (!leftover->handle) 384 goto done_fast; 385 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); 386 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); 387 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); 388 if (cc) 389 return cc; 390 /* 391 * Intentionally leak unusable memory. If the UVC fails, the memory 392 * used for the VM and its metadata is permanently unusable. 393 * This can only happen in case of a serious KVM or hardware bug; it 394 * is not expected to happen in normal operation. 395 */ 396 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); 397 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); 398 vfree(leftover->stor_var); 399 done_fast: 400 atomic_dec(&kvm->mm->context.protected_count); 401 return 0; 402 } 403 404 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) 405 { 406 struct uv_cb_destroy_fast uvcb = { 407 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, 408 .header.len = sizeof(uvcb), 409 .handle = kvm_s390_pv_get_handle(kvm), 410 }; 411 int cc; 412 413 cc = uv_call_sched(0, (u64)&uvcb); 414 if (rc) 415 *rc = uvcb.header.rc; 416 if (rrc) 417 *rrc = uvcb.header.rrc; 418 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", 419 uvcb.header.rc, uvcb.header.rrc); 420 WARN_ONCE(cc && uvcb.header.rc != 0x104, 421 "protvirt destroy vm fast failed handle %llx rc %x rrc %x", 422 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); 423 /* Intended memory leak on "impossible" error */ 424 if (!cc) 425 kvm_s390_pv_dealloc_vm(kvm); 426 return cc ? -EIO : 0; 427 } 428 429 static inline bool is_destroy_fast_available(void) 430 { 431 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); 432 } 433 434 /** 435 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. 436 * @kvm: the VM 437 * @rc: return value for the RC field of the UVCB 438 * @rrc: return value for the RRC field of the UVCB 439 * 440 * Set aside the protected VM for a subsequent teardown. The VM will be able 441 * to continue immediately as a non-secure VM, and the information needed to 442 * properly tear down the protected VM is set aside. If another protected VM 443 * was already set aside without starting its teardown, this function will 444 * fail. 445 * The CPUs of the protected VM need to be destroyed beforehand. 446 * 447 * Context: kvm->lock needs to be held 448 * 449 * Return: 0 in case of success, -EINVAL if another protected VM was already set 450 * aside, -ENOMEM if the system ran out of memory. 451 */ 452 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) 453 { 454 struct pv_vm_to_be_destroyed *priv; 455 int res = 0; 456 457 lockdep_assert_held(&kvm->lock); 458 /* 459 * If another protected VM was already prepared for teardown, refuse. 460 * A normal deinitialization has to be performed instead. 461 */ 462 if (kvm->arch.pv.set_aside) 463 return -EINVAL; 464 465 /* Guest with segment type ASCE, refuse to destroy asynchronously */ 466 if (kvm->arch.gmap->asce.dt == TABLE_TYPE_SEGMENT) 467 return -EINVAL; 468 469 priv = kzalloc_obj(*priv); 470 if (!priv) 471 return -ENOMEM; 472 473 if (is_destroy_fast_available()) { 474 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); 475 } else { 476 priv->stor_var = kvm->arch.pv.stor_var; 477 priv->stor_base = kvm->arch.pv.stor_base; 478 priv->handle = kvm_s390_pv_get_handle(kvm); 479 priv->old_gmap_table = (unsigned long)dereference_asce(kvm->arch.gmap->asce); 480 if (s390_replace_asce(kvm->arch.gmap)) 481 res = -ENOMEM; 482 } 483 484 if (res) { 485 kfree(priv); 486 return res; 487 } 488 489 gmap_pv_destroy_range(kvm->arch.gmap, 0, gpa_to_gfn(SZ_2G), false); 490 kvm_s390_clear_pv_state(kvm); 491 kvm->arch.pv.set_aside = priv; 492 493 *rc = UVC_RC_EXECUTED; 494 *rrc = 42; 495 return 0; 496 } 497 498 /** 499 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM 500 * @kvm: the KVM whose protected VM needs to be deinitialized 501 * @rc: the RC code of the UVC 502 * @rrc: the RRC code of the UVC 503 * 504 * Deinitialize the current protected VM. This function will destroy and 505 * cleanup the current protected VM, but it will not cleanup the guest 506 * memory. This function should only be called when the protected VM has 507 * just been created and therefore does not have any guest memory, or when 508 * the caller cleans up the guest memory separately. 509 * 510 * This function should not fail, but if it does, the donated memory must 511 * not be freed. 512 * 513 * Context: kvm->lock needs to be held 514 * 515 * Return: 0 in case of success, otherwise -EIO 516 */ 517 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 518 { 519 int cc; 520 521 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 522 UVC_CMD_DESTROY_SEC_CONF, rc, rrc); 523 if (!cc) { 524 atomic_dec(&kvm->mm->context.protected_count); 525 kvm_s390_pv_dealloc_vm(kvm); 526 } else { 527 /* Intended memory leak on "impossible" error */ 528 s390_replace_asce(kvm->arch.gmap); 529 } 530 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc); 531 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc); 532 533 return cc ? -EIO : 0; 534 } 535 536 /** 537 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated 538 * with a specific KVM. 539 * @kvm: the KVM to be cleaned up 540 * @rc: the RC code of the first failing UVC 541 * @rrc: the RRC code of the first failing UVC 542 * 543 * This function will clean up all protected VMs associated with a KVM. 544 * This includes the active one, the one prepared for deinitialization with 545 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. 546 * 547 * Context: kvm->lock needs to be held unless being called from 548 * kvm_arch_destroy_vm. 549 * 550 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO 551 */ 552 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) 553 { 554 struct pv_vm_to_be_destroyed *cur; 555 bool need_zap = false; 556 u16 _rc, _rrc; 557 int cc = 0; 558 559 /* 560 * Nothing to do if the counter was already 0. Otherwise make sure 561 * the counter does not reach 0 before calling s390_uv_destroy_range. 562 */ 563 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count)) 564 return 0; 565 566 *rc = 1; 567 /* If the current VM is protected, destroy it */ 568 if (kvm_s390_pv_get_handle(kvm)) { 569 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); 570 need_zap = true; 571 } 572 573 /* If a previous protected VM was set aside, put it in the need_cleanup list */ 574 if (kvm->arch.pv.set_aside) { 575 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); 576 kvm->arch.pv.set_aside = NULL; 577 } 578 579 /* Cleanup all protected VMs in the need_cleanup list */ 580 while (!list_empty(&kvm->arch.pv.need_cleanup)) { 581 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); 582 need_zap = true; 583 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { 584 cc = 1; 585 /* 586 * Only return the first error rc and rrc, so make 587 * sure it is not overwritten. All destroys will 588 * additionally be reported via KVM_UV_EVENT(). 589 */ 590 if (*rc == UVC_RC_EXECUTED) { 591 *rc = _rc; 592 *rrc = _rrc; 593 } 594 } 595 list_del(&cur->list); 596 kfree(cur); 597 } 598 599 /* 600 * If the mm still has a mapping, try to mark all its pages as 601 * accessible. The counter should not reach zero before this 602 * cleanup has been performed. 603 */ 604 if (need_zap && mmget_not_zero(kvm->mm)) { 605 gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), false); 606 mmput(kvm->mm); 607 } 608 609 /* Now the counter can safely reach 0 */ 610 atomic_dec(&kvm->mm->context.protected_count); 611 return cc ? -EIO : 0; 612 } 613 614 /** 615 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. 616 * @kvm: the VM previously associated with the protected VM 617 * @rc: return value for the RC field of the UVCB 618 * @rrc: return value for the RRC field of the UVCB 619 * 620 * Tear down the protected VM that had been previously prepared for teardown 621 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by 622 * userspace asynchronously from a separate thread. 623 * 624 * Context: kvm->lock must not be held. 625 * 626 * Return: 0 in case of success, -EINVAL if no protected VM had been 627 * prepared for asynchronous teardowm, -EIO in case of other errors. 628 */ 629 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 630 { 631 struct pv_vm_to_be_destroyed *p; 632 int ret = 0; 633 634 lockdep_assert_not_held(&kvm->lock); 635 mutex_lock(&kvm->lock); 636 p = kvm->arch.pv.set_aside; 637 kvm->arch.pv.set_aside = NULL; 638 mutex_unlock(&kvm->lock); 639 if (!p) 640 return -EINVAL; 641 642 /* When a fatal signal is received, stop immediately */ 643 if (gmap_pv_destroy_range(kvm->arch.gmap, 0, asce_end(kvm->arch.gmap->asce), true)) 644 goto done; 645 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) 646 ret = -EIO; 647 kfree(p); 648 p = NULL; 649 done: 650 /* 651 * p is not NULL if we aborted because of a fatal signal, in which 652 * case queue the leftover for later cleanup. 653 */ 654 if (p) { 655 mutex_lock(&kvm->lock); 656 list_add(&p->list, &kvm->arch.pv.need_cleanup); 657 mutex_unlock(&kvm->lock); 658 /* Did not finish, but pretend things went well */ 659 *rc = UVC_RC_EXECUTED; 660 *rrc = 42; 661 } 662 return ret; 663 } 664 665 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, 666 struct mm_struct *mm) 667 { 668 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); 669 u16 dummy; 670 int r; 671 672 /* 673 * No locking is needed since this is the last thread of the last user of this 674 * struct mm. 675 * When the struct kvm gets deinitialized, this notifier is also 676 * unregistered. This means that if this notifier runs, then the 677 * struct kvm is still valid. 678 */ 679 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 680 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) 681 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); 682 set_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &kvm->arch.gmap->flags); 683 } 684 685 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { 686 .release = kvm_s390_pv_mmu_notifier_release, 687 }; 688 689 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 690 { 691 struct uv_cb_cgc uvcb = { 692 .header.cmd = UVC_CMD_CREATE_SEC_CONF, 693 .header.len = sizeof(uvcb) 694 }; 695 int cc, ret; 696 u16 dummy; 697 698 /* Add the notifier only once. No races because we hold kvm->lock */ 699 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { 700 /* The notifier will be unregistered when the VM is destroyed */ 701 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; 702 ret = mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); 703 if (ret) { 704 kvm->arch.pv.mmu_notifier.ops = NULL; 705 return ret; 706 } 707 } 708 709 ret = kvm_s390_pv_alloc_vm(kvm); 710 if (ret) 711 return ret; 712 713 /* Inputs */ 714 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ 715 uvcb.guest_stor_len = kvm->arch.pv.guest_len; 716 uvcb.guest_asce = kvm->arch.gmap->asce.val; 717 uvcb.guest_sca = virt_to_phys(kvm->arch.sca); 718 uvcb.conf_base_stor_origin = 719 virt_to_phys((void *)kvm->arch.pv.stor_base); 720 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; 721 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap; 722 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr; 723 724 clear_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &kvm->arch.gmap->flags); 725 gmap_split_huge_pages(kvm->arch.gmap); 726 727 cc = uv_call_sched(0, (u64)&uvcb); 728 *rc = uvcb.header.rc; 729 *rrc = uvcb.header.rrc; 730 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x", 731 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw); 732 733 /* Outputs */ 734 kvm->arch.pv.handle = uvcb.guest_handle; 735 736 atomic_inc(&kvm->mm->context.protected_count); 737 if (cc) { 738 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) { 739 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 740 } else { 741 atomic_dec(&kvm->mm->context.protected_count); 742 kvm_s390_pv_dealloc_vm(kvm); 743 } 744 return -EIO; 745 } 746 return 0; 747 } 748 749 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, 750 u16 *rrc) 751 { 752 struct uv_cb_ssc uvcb = { 753 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS, 754 .header.len = sizeof(uvcb), 755 .sec_header_origin = (u64)hdr, 756 .sec_header_len = length, 757 .guest_handle = kvm_s390_pv_get_handle(kvm), 758 }; 759 int cc = uv_call(0, (u64)&uvcb); 760 761 *rc = uvcb.header.rc; 762 *rrc = uvcb.header.rrc; 763 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x", 764 *rc, *rrc); 765 return cc ? -EINVAL : 0; 766 } 767 768 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, 769 u64 offset, u16 *rc, u16 *rrc) 770 { 771 struct uv_cb_unp uvcb = { 772 .header.cmd = UVC_CMD_UNPACK_IMG, 773 .header.len = sizeof(uvcb), 774 .guest_handle = kvm_s390_pv_get_handle(kvm), 775 .gaddr = addr, 776 .tweak[0] = tweak, 777 .tweak[1] = offset, 778 }; 779 int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb); 780 781 *rc = uvcb.header.rc; 782 *rrc = uvcb.header.rrc; 783 784 if (ret == -ENXIO) { 785 ret = kvm_s390_faultin_gfn_simple(NULL, kvm, gpa_to_gfn(addr), true); 786 if (!ret) 787 return -EAGAIN; 788 } 789 790 if (ret && ret != -EAGAIN) 791 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", 792 uvcb.gaddr, *rc, *rrc); 793 return ret; 794 } 795 796 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, 797 unsigned long tweak, u16 *rc, u16 *rrc) 798 { 799 u64 offset = 0; 800 int ret = 0; 801 802 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK) 803 return -EINVAL; 804 805 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", 806 addr, size); 807 808 guard(srcu)(&kvm->srcu); 809 810 while (offset < size) { 811 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); 812 if (ret == -EAGAIN) { 813 cond_resched(); 814 if (fatal_signal_pending(current)) 815 break; 816 continue; 817 } 818 if (ret) 819 break; 820 addr += PAGE_SIZE; 821 offset += PAGE_SIZE; 822 } 823 if (!ret) 824 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful"); 825 return ret; 826 } 827 828 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state) 829 { 830 struct uv_cb_cpu_set_state uvcb = { 831 .header.cmd = UVC_CMD_CPU_SET_STATE, 832 .header.len = sizeof(uvcb), 833 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu), 834 .state = state, 835 }; 836 int cc; 837 838 cc = uv_call(0, (u64)&uvcb); 839 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x", 840 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc); 841 if (cc) 842 return -EINVAL; 843 return 0; 844 } 845 846 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc) 847 { 848 struct uv_cb_dump_cpu uvcb = { 849 .header.cmd = UVC_CMD_DUMP_CPU, 850 .header.len = sizeof(uvcb), 851 .cpu_handle = vcpu->arch.pv.handle, 852 .dump_area_origin = (u64)buff, 853 }; 854 int cc; 855 856 cc = uv_call_sched(0, (u64)&uvcb); 857 *rc = uvcb.header.rc; 858 *rrc = uvcb.header.rrc; 859 return cc; 860 } 861 862 /* Size of the cache for the storage state dump data. 1MB for now */ 863 #define DUMP_BUFF_LEN HPAGE_SIZE 864 865 /** 866 * kvm_s390_pv_dump_stor_state 867 * 868 * @kvm: pointer to the guest's KVM struct 869 * @buff_user: Userspace pointer where we will write the results to 870 * @gaddr: Starting absolute guest address for which the storage state 871 * is requested. 872 * @buff_user_len: Length of the buff_user buffer 873 * @rc: Pointer to where the uvcb return code is stored 874 * @rrc: Pointer to where the uvcb return reason code is stored 875 * 876 * Stores buff_len bytes of tweak component values to buff_user 877 * starting with the 1MB block specified by the absolute guest address 878 * (gaddr). The gaddr pointer will be updated with the last address 879 * for which data was written when returning to userspace. buff_user 880 * might be written to even if an error rc is returned. For instance 881 * if we encounter a fault after writing the first page of data. 882 * 883 * Context: kvm->lock needs to be held 884 * 885 * Return: 886 * 0 on success 887 * -ENOMEM if allocating the cache fails 888 * -EINVAL if gaddr is not aligned to 1MB 889 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len 890 * -EINVAL if the UV call fails, rc and rrc will be set in this case 891 * -EFAULT if copying the result to buff_user failed 892 */ 893 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, 894 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc) 895 { 896 struct uv_cb_dump_stor_state uvcb = { 897 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE, 898 .header.len = sizeof(uvcb), 899 .config_handle = kvm->arch.pv.handle, 900 .gaddr = *gaddr, 901 .dump_area_origin = 0, 902 }; 903 const u64 increment_len = uv_info.conf_dump_storage_state_len; 904 size_t buff_kvm_size; 905 size_t size_done = 0; 906 u8 *buff_kvm = NULL; 907 int cc, ret; 908 909 ret = -EINVAL; 910 /* UV call processes 1MB guest storage chunks at a time */ 911 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE)) 912 goto out; 913 914 /* 915 * We provide the storage state for 1MB chunks of guest 916 * storage. The buffer will need to be aligned to 917 * conf_dump_storage_state_len so we don't end on a partial 918 * chunk. 919 */ 920 if (!buff_user_len || 921 !IS_ALIGNED(buff_user_len, increment_len)) 922 goto out; 923 924 /* 925 * Allocate a buffer from which we will later copy to the user 926 * process. We don't want userspace to dictate our buffer size 927 * so we limit it to DUMP_BUFF_LEN. 928 */ 929 ret = -ENOMEM; 930 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN); 931 buff_kvm = vzalloc(buff_kvm_size); 932 if (!buff_kvm) 933 goto out; 934 935 ret = 0; 936 uvcb.dump_area_origin = (u64)buff_kvm; 937 /* We will loop until the user buffer is filled or an error occurs */ 938 do { 939 /* Get 1MB worth of guest storage state data */ 940 cc = uv_call_sched(0, (u64)&uvcb); 941 942 /* All or nothing */ 943 if (cc) { 944 ret = -EINVAL; 945 break; 946 } 947 948 size_done += increment_len; 949 uvcb.dump_area_origin += increment_len; 950 buff_user_len -= increment_len; 951 uvcb.gaddr += HPAGE_SIZE; 952 953 /* KVM Buffer full, time to copy to the process */ 954 if (!buff_user_len || size_done == DUMP_BUFF_LEN) { 955 if (copy_to_user(buff_user, buff_kvm, size_done)) { 956 ret = -EFAULT; 957 break; 958 } 959 960 buff_user += size_done; 961 size_done = 0; 962 uvcb.dump_area_origin = (u64)buff_kvm; 963 } 964 } while (buff_user_len); 965 966 /* Report back where we ended dumping */ 967 *gaddr = uvcb.gaddr; 968 969 /* Lets only log errors, we don't want to spam */ 970 out: 971 if (ret) 972 KVM_UV_EVENT(kvm, 3, 973 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x", 974 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc); 975 *rc = uvcb.header.rc; 976 *rrc = uvcb.header.rrc; 977 vfree(buff_kvm); 978 979 return ret; 980 } 981 982 /** 983 * kvm_s390_pv_dump_complete 984 * 985 * @kvm: pointer to the guest's KVM struct 986 * @buff_user: Userspace pointer where we will write the results to 987 * @rc: Pointer to where the uvcb return code is stored 988 * @rrc: Pointer to where the uvcb return reason code is stored 989 * 990 * Completes the dumping operation and writes the completion data to 991 * user space. 992 * 993 * Context: kvm->lock needs to be held 994 * 995 * Return: 996 * 0 on success 997 * -ENOMEM if allocating the completion buffer fails 998 * -EINVAL if the UV call fails, rc and rrc will be set in this case 999 * -EFAULT if copying the result to buff_user failed 1000 */ 1001 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, 1002 u16 *rc, u16 *rrc) 1003 { 1004 struct uv_cb_dump_complete complete = { 1005 .header.len = sizeof(complete), 1006 .header.cmd = UVC_CMD_DUMP_COMPLETE, 1007 .config_handle = kvm_s390_pv_get_handle(kvm), 1008 }; 1009 u64 *compl_data; 1010 int ret; 1011 1012 /* Allocate dump area */ 1013 compl_data = vzalloc(uv_info.conf_dump_finalize_len); 1014 if (!compl_data) 1015 return -ENOMEM; 1016 complete.dump_area_origin = (u64)compl_data; 1017 1018 ret = uv_call_sched(0, (u64)&complete); 1019 *rc = complete.header.rc; 1020 *rrc = complete.header.rrc; 1021 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x", 1022 complete.header.rc, complete.header.rrc); 1023 1024 if (!ret) { 1025 /* 1026 * kvm_s390_pv_dealloc_vm() will also (mem)set 1027 * this to false on a reboot or other destroy 1028 * operation for this vm. 1029 */ 1030 kvm->arch.pv.dumping = false; 1031 kvm_s390_vcpu_unblock_all(kvm); 1032 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len); 1033 if (ret) 1034 ret = -EFAULT; 1035 } 1036 vfree(compl_data); 1037 /* If the UVC returned an error, translate it to -EINVAL */ 1038 if (ret > 0) 1039 ret = -EINVAL; 1040 return ret; 1041 } 1042