1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Hosting Protected Virtual Machines 4 * 5 * Copyright IBM Corp. 2019, 2020 6 * Author(s): Janosch Frank <frankja@linux.ibm.com> 7 */ 8 9 #include <linux/export.h> 10 #include <linux/kvm.h> 11 #include <linux/kvm_host.h> 12 #include <linux/minmax.h> 13 #include <linux/pagemap.h> 14 #include <linux/sched/signal.h> 15 #include <asm/gmap.h> 16 #include <asm/uv.h> 17 #include <asm/mman.h> 18 #include <linux/pagewalk.h> 19 #include <linux/sched/mm.h> 20 #include <linux/mmu_notifier.h> 21 #include "kvm-s390.h" 22 23 bool kvm_s390_pv_is_protected(struct kvm *kvm) 24 { 25 lockdep_assert_held(&kvm->lock); 26 return !!kvm_s390_pv_get_handle(kvm); 27 } 28 EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); 29 30 bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) 31 { 32 lockdep_assert_held(&vcpu->mutex); 33 return !!kvm_s390_pv_cpu_get_handle(vcpu); 34 } 35 EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); 36 37 /** 38 * kvm_s390_pv_make_secure() - make one guest page secure 39 * @kvm: the guest 40 * @gaddr: the guest address that needs to be made secure 41 * @uvcb: the UVCB specifying which operation needs to be performed 42 * 43 * Context: needs to be called with kvm->srcu held. 44 * Return: 0 on success, < 0 in case of error. 45 */ 46 int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb) 47 { 48 unsigned long vmaddr; 49 50 lockdep_assert_held(&kvm->srcu); 51 52 vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr)); 53 if (kvm_is_error_hva(vmaddr)) 54 return -EFAULT; 55 return make_hva_secure(kvm->mm, vmaddr, uvcb); 56 } 57 58 int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr) 59 { 60 struct uv_cb_cts uvcb = { 61 .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, 62 .header.len = sizeof(uvcb), 63 .guest_handle = kvm_s390_pv_get_handle(kvm), 64 .gaddr = gaddr, 65 }; 66 67 return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb); 68 } 69 70 /** 71 * kvm_s390_pv_destroy_page() - Destroy a guest page. 72 * @kvm: the guest 73 * @gaddr: the guest address to destroy 74 * 75 * An attempt will be made to destroy the given guest page. If the attempt 76 * fails, an attempt is made to export the page. If both attempts fail, an 77 * appropriate error is returned. 78 * 79 * Context: may sleep. 80 */ 81 int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr) 82 { 83 struct page *page; 84 int rc = 0; 85 86 mmap_read_lock(kvm->mm); 87 page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); 88 if (page) 89 rc = __kvm_s390_pv_destroy_page(page); 90 kvm_release_page_clean(page); 91 mmap_read_unlock(kvm->mm); 92 return rc; 93 } 94 95 /** 96 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to 97 * be destroyed 98 * 99 * @list: list head for the list of leftover VMs 100 * @old_gmap_table: the gmap table of the leftover protected VM 101 * @handle: the handle of the leftover protected VM 102 * @stor_var: pointer to the variable storage of the leftover protected VM 103 * @stor_base: address of the base storage of the leftover protected VM 104 * 105 * Represents a protected VM that is still registered with the Ultravisor, 106 * but which does not correspond any longer to an active KVM VM. It should 107 * be destroyed at some point later, either asynchronously or when the 108 * process terminates. 109 */ 110 struct pv_vm_to_be_destroyed { 111 struct list_head list; 112 unsigned long old_gmap_table; 113 u64 handle; 114 void *stor_var; 115 unsigned long stor_base; 116 }; 117 118 static void kvm_s390_clear_pv_state(struct kvm *kvm) 119 { 120 kvm->arch.pv.handle = 0; 121 kvm->arch.pv.guest_len = 0; 122 kvm->arch.pv.stor_base = 0; 123 kvm->arch.pv.stor_var = NULL; 124 } 125 126 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) 127 { 128 int cc; 129 130 if (!kvm_s390_pv_cpu_get_handle(vcpu)) 131 return 0; 132 133 cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); 134 135 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", 136 vcpu->vcpu_id, *rc, *rrc); 137 WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); 138 139 /* Intended memory leak for something that should never happen. */ 140 if (!cc) 141 free_pages(vcpu->arch.pv.stor_base, 142 get_order(uv_info.guest_cpu_stor_len)); 143 144 free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); 145 vcpu->arch.sie_block->pv_handle_cpu = 0; 146 vcpu->arch.sie_block->pv_handle_config = 0; 147 memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); 148 vcpu->arch.sie_block->sdf = 0; 149 /* 150 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0). 151 * Use the reset value of gbea to avoid leaking the kernel pointer of 152 * the just freed sida. 153 */ 154 vcpu->arch.sie_block->gbea = 1; 155 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 156 157 return cc ? EIO : 0; 158 } 159 160 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) 161 { 162 struct uv_cb_csc uvcb = { 163 .header.cmd = UVC_CMD_CREATE_SEC_CPU, 164 .header.len = sizeof(uvcb), 165 }; 166 void *sida_addr; 167 int cc; 168 169 if (kvm_s390_pv_cpu_get_handle(vcpu)) 170 return -EINVAL; 171 172 vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, 173 get_order(uv_info.guest_cpu_stor_len)); 174 if (!vcpu->arch.pv.stor_base) 175 return -ENOMEM; 176 177 /* Input */ 178 uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); 179 uvcb.num = vcpu->arch.sie_block->icpua; 180 uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); 181 uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); 182 183 /* Alloc Secure Instruction Data Area Designation */ 184 sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 185 if (!sida_addr) { 186 free_pages(vcpu->arch.pv.stor_base, 187 get_order(uv_info.guest_cpu_stor_len)); 188 return -ENOMEM; 189 } 190 vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); 191 192 cc = uv_call(0, (u64)&uvcb); 193 *rc = uvcb.header.rc; 194 *rrc = uvcb.header.rrc; 195 KVM_UV_EVENT(vcpu->kvm, 3, 196 "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x", 197 vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc, 198 uvcb.header.rrc); 199 200 if (cc) { 201 u16 dummy; 202 203 kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy); 204 return -EIO; 205 } 206 207 /* Output */ 208 vcpu->arch.pv.handle = uvcb.cpu_handle; 209 vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle; 210 vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm); 211 vcpu->arch.sie_block->sdf = 2; 212 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 213 return 0; 214 } 215 216 /* only free resources when the destroy was successful */ 217 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm) 218 { 219 vfree(kvm->arch.pv.stor_var); 220 free_pages(kvm->arch.pv.stor_base, 221 get_order(uv_info.guest_base_stor_len)); 222 kvm_s390_clear_pv_state(kvm); 223 } 224 225 static int kvm_s390_pv_alloc_vm(struct kvm *kvm) 226 { 227 unsigned long base = uv_info.guest_base_stor_len; 228 unsigned long virt = uv_info.guest_virt_var_stor_len; 229 unsigned long npages = 0, vlen = 0; 230 231 kvm->arch.pv.stor_var = NULL; 232 kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base)); 233 if (!kvm->arch.pv.stor_base) 234 return -ENOMEM; 235 236 /* 237 * Calculate current guest storage for allocation of the 238 * variable storage, which is based on the length in MB. 239 * 240 * Slots are sorted by GFN 241 */ 242 mutex_lock(&kvm->slots_lock); 243 npages = kvm_s390_get_gfn_end(kvm_memslots(kvm)); 244 mutex_unlock(&kvm->slots_lock); 245 246 kvm->arch.pv.guest_len = npages * PAGE_SIZE; 247 248 /* Allocate variable storage */ 249 vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); 250 vlen += uv_info.guest_virt_base_stor_len; 251 kvm->arch.pv.stor_var = vzalloc(vlen); 252 if (!kvm->arch.pv.stor_var) 253 goto out_err; 254 return 0; 255 256 out_err: 257 kvm_s390_pv_dealloc_vm(kvm); 258 return -ENOMEM; 259 } 260 261 /** 262 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. 263 * @kvm: the KVM that was associated with this leftover protected VM 264 * @leftover: details about the leftover protected VM that needs a clean up 265 * @rc: the RC code of the Destroy Secure Configuration UVC 266 * @rrc: the RRC code of the Destroy Secure Configuration UVC 267 * 268 * Destroy one leftover protected VM. 269 * On success, kvm->mm->context.protected_count will be decremented atomically 270 * and all other resources used by the VM will be freed. 271 * 272 * Return: 0 in case of success, otherwise 1 273 */ 274 static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, 275 struct pv_vm_to_be_destroyed *leftover, 276 u16 *rc, u16 *rrc) 277 { 278 int cc; 279 280 /* It used the destroy-fast UVC, nothing left to do here */ 281 if (!leftover->handle) 282 goto done_fast; 283 cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); 284 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); 285 WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); 286 if (cc) 287 return cc; 288 /* 289 * Intentionally leak unusable memory. If the UVC fails, the memory 290 * used for the VM and its metadata is permanently unusable. 291 * This can only happen in case of a serious KVM or hardware bug; it 292 * is not expected to happen in normal operation. 293 */ 294 free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); 295 free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); 296 vfree(leftover->stor_var); 297 done_fast: 298 atomic_dec(&kvm->mm->context.protected_count); 299 return 0; 300 } 301 302 /** 303 * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory. 304 * @kvm: the VM whose memory is to be cleared. 305 * 306 * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot. 307 * The CPUs of the protected VM need to be destroyed beforehand. 308 */ 309 static void kvm_s390_destroy_lower_2g(struct kvm *kvm) 310 { 311 const unsigned long pages_2g = SZ_2G / PAGE_SIZE; 312 struct kvm_memory_slot *slot; 313 unsigned long len; 314 int srcu_idx; 315 316 srcu_idx = srcu_read_lock(&kvm->srcu); 317 318 /* Take the memslot containing guest absolute address 0 */ 319 slot = gfn_to_memslot(kvm, 0); 320 /* Clear all slots or parts thereof that are below 2GB */ 321 while (slot && slot->base_gfn < pages_2g) { 322 len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE; 323 s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len); 324 /* Take the next memslot */ 325 slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages); 326 } 327 328 srcu_read_unlock(&kvm->srcu, srcu_idx); 329 } 330 331 static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) 332 { 333 struct uv_cb_destroy_fast uvcb = { 334 .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, 335 .header.len = sizeof(uvcb), 336 .handle = kvm_s390_pv_get_handle(kvm), 337 }; 338 int cc; 339 340 cc = uv_call_sched(0, (u64)&uvcb); 341 if (rc) 342 *rc = uvcb.header.rc; 343 if (rrc) 344 *rrc = uvcb.header.rrc; 345 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); 346 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", 347 uvcb.header.rc, uvcb.header.rrc); 348 WARN_ONCE(cc && uvcb.header.rc != 0x104, 349 "protvirt destroy vm fast failed handle %llx rc %x rrc %x", 350 kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); 351 /* Intended memory leak on "impossible" error */ 352 if (!cc) 353 kvm_s390_pv_dealloc_vm(kvm); 354 return cc ? -EIO : 0; 355 } 356 357 static inline bool is_destroy_fast_available(void) 358 { 359 return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); 360 } 361 362 /** 363 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. 364 * @kvm: the VM 365 * @rc: return value for the RC field of the UVCB 366 * @rrc: return value for the RRC field of the UVCB 367 * 368 * Set aside the protected VM for a subsequent teardown. The VM will be able 369 * to continue immediately as a non-secure VM, and the information needed to 370 * properly tear down the protected VM is set aside. If another protected VM 371 * was already set aside without starting its teardown, this function will 372 * fail. 373 * The CPUs of the protected VM need to be destroyed beforehand. 374 * 375 * Context: kvm->lock needs to be held 376 * 377 * Return: 0 in case of success, -EINVAL if another protected VM was already set 378 * aside, -ENOMEM if the system ran out of memory. 379 */ 380 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) 381 { 382 struct pv_vm_to_be_destroyed *priv; 383 int res = 0; 384 385 lockdep_assert_held(&kvm->lock); 386 /* 387 * If another protected VM was already prepared for teardown, refuse. 388 * A normal deinitialization has to be performed instead. 389 */ 390 if (kvm->arch.pv.set_aside) 391 return -EINVAL; 392 393 /* Guest with segment type ASCE, refuse to destroy asynchronously */ 394 if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 395 return -EINVAL; 396 397 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 398 if (!priv) 399 return -ENOMEM; 400 401 if (is_destroy_fast_available()) { 402 res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); 403 } else { 404 priv->stor_var = kvm->arch.pv.stor_var; 405 priv->stor_base = kvm->arch.pv.stor_base; 406 priv->handle = kvm_s390_pv_get_handle(kvm); 407 priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; 408 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); 409 if (s390_replace_asce(kvm->arch.gmap)) 410 res = -ENOMEM; 411 } 412 413 if (res) { 414 kfree(priv); 415 return res; 416 } 417 418 kvm_s390_destroy_lower_2g(kvm); 419 kvm_s390_clear_pv_state(kvm); 420 kvm->arch.pv.set_aside = priv; 421 422 *rc = UVC_RC_EXECUTED; 423 *rrc = 42; 424 return 0; 425 } 426 427 /** 428 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM 429 * @kvm: the KVM whose protected VM needs to be deinitialized 430 * @rc: the RC code of the UVC 431 * @rrc: the RRC code of the UVC 432 * 433 * Deinitialize the current protected VM. This function will destroy and 434 * cleanup the current protected VM, but it will not cleanup the guest 435 * memory. This function should only be called when the protected VM has 436 * just been created and therefore does not have any guest memory, or when 437 * the caller cleans up the guest memory separately. 438 * 439 * This function should not fail, but if it does, the donated memory must 440 * not be freed. 441 * 442 * Context: kvm->lock needs to be held 443 * 444 * Return: 0 in case of success, otherwise -EIO 445 */ 446 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 447 { 448 int cc; 449 450 cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 451 UVC_CMD_DESTROY_SEC_CONF, rc, rrc); 452 WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); 453 if (!cc) { 454 atomic_dec(&kvm->mm->context.protected_count); 455 kvm_s390_pv_dealloc_vm(kvm); 456 } else { 457 /* Intended memory leak on "impossible" error */ 458 s390_replace_asce(kvm->arch.gmap); 459 } 460 KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc); 461 WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc); 462 463 return cc ? -EIO : 0; 464 } 465 466 /** 467 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated 468 * with a specific KVM. 469 * @kvm: the KVM to be cleaned up 470 * @rc: the RC code of the first failing UVC 471 * @rrc: the RRC code of the first failing UVC 472 * 473 * This function will clean up all protected VMs associated with a KVM. 474 * This includes the active one, the one prepared for deinitialization with 475 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. 476 * 477 * Context: kvm->lock needs to be held unless being called from 478 * kvm_arch_destroy_vm. 479 * 480 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO 481 */ 482 int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) 483 { 484 struct pv_vm_to_be_destroyed *cur; 485 bool need_zap = false; 486 u16 _rc, _rrc; 487 int cc = 0; 488 489 /* 490 * Nothing to do if the counter was already 0. Otherwise make sure 491 * the counter does not reach 0 before calling s390_uv_destroy_range. 492 */ 493 if (!atomic_inc_not_zero(&kvm->mm->context.protected_count)) 494 return 0; 495 496 *rc = 1; 497 /* If the current VM is protected, destroy it */ 498 if (kvm_s390_pv_get_handle(kvm)) { 499 cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); 500 need_zap = true; 501 } 502 503 /* If a previous protected VM was set aside, put it in the need_cleanup list */ 504 if (kvm->arch.pv.set_aside) { 505 list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); 506 kvm->arch.pv.set_aside = NULL; 507 } 508 509 /* Cleanup all protected VMs in the need_cleanup list */ 510 while (!list_empty(&kvm->arch.pv.need_cleanup)) { 511 cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); 512 need_zap = true; 513 if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { 514 cc = 1; 515 /* 516 * Only return the first error rc and rrc, so make 517 * sure it is not overwritten. All destroys will 518 * additionally be reported via KVM_UV_EVENT(). 519 */ 520 if (*rc == UVC_RC_EXECUTED) { 521 *rc = _rc; 522 *rrc = _rrc; 523 } 524 } 525 list_del(&cur->list); 526 kfree(cur); 527 } 528 529 /* 530 * If the mm still has a mapping, try to mark all its pages as 531 * accessible. The counter should not reach zero before this 532 * cleanup has been performed. 533 */ 534 if (need_zap && mmget_not_zero(kvm->mm)) { 535 s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); 536 mmput(kvm->mm); 537 } 538 539 /* Now the counter can safely reach 0 */ 540 atomic_dec(&kvm->mm->context.protected_count); 541 return cc ? -EIO : 0; 542 } 543 544 /** 545 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. 546 * @kvm: the VM previously associated with the protected VM 547 * @rc: return value for the RC field of the UVCB 548 * @rrc: return value for the RRC field of the UVCB 549 * 550 * Tear down the protected VM that had been previously prepared for teardown 551 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by 552 * userspace asynchronously from a separate thread. 553 * 554 * Context: kvm->lock must not be held. 555 * 556 * Return: 0 in case of success, -EINVAL if no protected VM had been 557 * prepared for asynchronous teardowm, -EIO in case of other errors. 558 */ 559 int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 560 { 561 struct pv_vm_to_be_destroyed *p; 562 int ret = 0; 563 564 lockdep_assert_not_held(&kvm->lock); 565 mutex_lock(&kvm->lock); 566 p = kvm->arch.pv.set_aside; 567 kvm->arch.pv.set_aside = NULL; 568 mutex_unlock(&kvm->lock); 569 if (!p) 570 return -EINVAL; 571 572 /* When a fatal signal is received, stop immediately */ 573 if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) 574 goto done; 575 if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) 576 ret = -EIO; 577 kfree(p); 578 p = NULL; 579 done: 580 /* 581 * p is not NULL if we aborted because of a fatal signal, in which 582 * case queue the leftover for later cleanup. 583 */ 584 if (p) { 585 mutex_lock(&kvm->lock); 586 list_add(&p->list, &kvm->arch.pv.need_cleanup); 587 mutex_unlock(&kvm->lock); 588 /* Did not finish, but pretend things went well */ 589 *rc = UVC_RC_EXECUTED; 590 *rrc = 42; 591 } 592 return ret; 593 } 594 595 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, 596 struct mm_struct *mm) 597 { 598 struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); 599 u16 dummy; 600 int r; 601 602 /* 603 * No locking is needed since this is the last thread of the last user of this 604 * struct mm. 605 * When the struct kvm gets deinitialized, this notifier is also 606 * unregistered. This means that if this notifier runs, then the 607 * struct kvm is still valid. 608 */ 609 r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); 610 if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) 611 kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); 612 } 613 614 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { 615 .release = kvm_s390_pv_mmu_notifier_release, 616 }; 617 618 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) 619 { 620 struct uv_cb_cgc uvcb = { 621 .header.cmd = UVC_CMD_CREATE_SEC_CONF, 622 .header.len = sizeof(uvcb) 623 }; 624 int cc, ret; 625 u16 dummy; 626 627 ret = kvm_s390_pv_alloc_vm(kvm); 628 if (ret) 629 return ret; 630 631 /* Inputs */ 632 uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ 633 uvcb.guest_stor_len = kvm->arch.pv.guest_len; 634 uvcb.guest_asce = kvm->arch.gmap->asce; 635 uvcb.guest_sca = virt_to_phys(kvm->arch.sca); 636 uvcb.conf_base_stor_origin = 637 virt_to_phys((void *)kvm->arch.pv.stor_base); 638 uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; 639 uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap; 640 uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr; 641 642 cc = uv_call_sched(0, (u64)&uvcb); 643 *rc = uvcb.header.rc; 644 *rrc = uvcb.header.rrc; 645 KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x", 646 uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw); 647 648 /* Outputs */ 649 kvm->arch.pv.handle = uvcb.guest_handle; 650 651 atomic_inc(&kvm->mm->context.protected_count); 652 if (cc) { 653 if (uvcb.header.rc & UVC_RC_NEED_DESTROY) { 654 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); 655 } else { 656 atomic_dec(&kvm->mm->context.protected_count); 657 kvm_s390_pv_dealloc_vm(kvm); 658 } 659 return -EIO; 660 } 661 kvm->arch.gmap->guest_handle = uvcb.guest_handle; 662 /* Add the notifier only once. No races because we hold kvm->lock */ 663 if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) { 664 kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops; 665 mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm); 666 } 667 return 0; 668 } 669 670 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, 671 u16 *rrc) 672 { 673 struct uv_cb_ssc uvcb = { 674 .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS, 675 .header.len = sizeof(uvcb), 676 .sec_header_origin = (u64)hdr, 677 .sec_header_len = length, 678 .guest_handle = kvm_s390_pv_get_handle(kvm), 679 }; 680 int cc = uv_call(0, (u64)&uvcb); 681 682 *rc = uvcb.header.rc; 683 *rrc = uvcb.header.rrc; 684 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x", 685 *rc, *rrc); 686 return cc ? -EINVAL : 0; 687 } 688 689 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, 690 u64 offset, u16 *rc, u16 *rrc) 691 { 692 struct uv_cb_unp uvcb = { 693 .header.cmd = UVC_CMD_UNPACK_IMG, 694 .header.len = sizeof(uvcb), 695 .guest_handle = kvm_s390_pv_get_handle(kvm), 696 .gaddr = addr, 697 .tweak[0] = tweak, 698 .tweak[1] = offset, 699 }; 700 int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb); 701 unsigned long vmaddr; 702 bool unlocked; 703 704 *rc = uvcb.header.rc; 705 *rrc = uvcb.header.rrc; 706 707 if (ret == -ENXIO) { 708 mmap_read_lock(kvm->mm); 709 vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr)); 710 if (kvm_is_error_hva(vmaddr)) { 711 ret = -EFAULT; 712 } else { 713 ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); 714 if (!ret) 715 ret = __gmap_link(kvm->arch.gmap, addr, vmaddr); 716 } 717 mmap_read_unlock(kvm->mm); 718 if (!ret) 719 return -EAGAIN; 720 return ret; 721 } 722 723 if (ret && ret != -EAGAIN) 724 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", 725 uvcb.gaddr, *rc, *rrc); 726 return ret; 727 } 728 729 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, 730 unsigned long tweak, u16 *rc, u16 *rrc) 731 { 732 u64 offset = 0; 733 int ret = 0; 734 735 if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK) 736 return -EINVAL; 737 738 KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", 739 addr, size); 740 741 guard(srcu)(&kvm->srcu); 742 743 while (offset < size) { 744 ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); 745 if (ret == -EAGAIN) { 746 cond_resched(); 747 if (fatal_signal_pending(current)) 748 break; 749 continue; 750 } 751 if (ret) 752 break; 753 addr += PAGE_SIZE; 754 offset += PAGE_SIZE; 755 } 756 if (!ret) 757 KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful"); 758 return ret; 759 } 760 761 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state) 762 { 763 struct uv_cb_cpu_set_state uvcb = { 764 .header.cmd = UVC_CMD_CPU_SET_STATE, 765 .header.len = sizeof(uvcb), 766 .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu), 767 .state = state, 768 }; 769 int cc; 770 771 cc = uv_call(0, (u64)&uvcb); 772 KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x", 773 vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc); 774 if (cc) 775 return -EINVAL; 776 return 0; 777 } 778 779 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc) 780 { 781 struct uv_cb_dump_cpu uvcb = { 782 .header.cmd = UVC_CMD_DUMP_CPU, 783 .header.len = sizeof(uvcb), 784 .cpu_handle = vcpu->arch.pv.handle, 785 .dump_area_origin = (u64)buff, 786 }; 787 int cc; 788 789 cc = uv_call_sched(0, (u64)&uvcb); 790 *rc = uvcb.header.rc; 791 *rrc = uvcb.header.rrc; 792 return cc; 793 } 794 795 /* Size of the cache for the storage state dump data. 1MB for now */ 796 #define DUMP_BUFF_LEN HPAGE_SIZE 797 798 /** 799 * kvm_s390_pv_dump_stor_state 800 * 801 * @kvm: pointer to the guest's KVM struct 802 * @buff_user: Userspace pointer where we will write the results to 803 * @gaddr: Starting absolute guest address for which the storage state 804 * is requested. 805 * @buff_user_len: Length of the buff_user buffer 806 * @rc: Pointer to where the uvcb return code is stored 807 * @rrc: Pointer to where the uvcb return reason code is stored 808 * 809 * Stores buff_len bytes of tweak component values to buff_user 810 * starting with the 1MB block specified by the absolute guest address 811 * (gaddr). The gaddr pointer will be updated with the last address 812 * for which data was written when returning to userspace. buff_user 813 * might be written to even if an error rc is returned. For instance 814 * if we encounter a fault after writing the first page of data. 815 * 816 * Context: kvm->lock needs to be held 817 * 818 * Return: 819 * 0 on success 820 * -ENOMEM if allocating the cache fails 821 * -EINVAL if gaddr is not aligned to 1MB 822 * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len 823 * -EINVAL if the UV call fails, rc and rrc will be set in this case 824 * -EFAULT if copying the result to buff_user failed 825 */ 826 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, 827 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc) 828 { 829 struct uv_cb_dump_stor_state uvcb = { 830 .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE, 831 .header.len = sizeof(uvcb), 832 .config_handle = kvm->arch.pv.handle, 833 .gaddr = *gaddr, 834 .dump_area_origin = 0, 835 }; 836 const u64 increment_len = uv_info.conf_dump_storage_state_len; 837 size_t buff_kvm_size; 838 size_t size_done = 0; 839 u8 *buff_kvm = NULL; 840 int cc, ret; 841 842 ret = -EINVAL; 843 /* UV call processes 1MB guest storage chunks at a time */ 844 if (!IS_ALIGNED(*gaddr, HPAGE_SIZE)) 845 goto out; 846 847 /* 848 * We provide the storage state for 1MB chunks of guest 849 * storage. The buffer will need to be aligned to 850 * conf_dump_storage_state_len so we don't end on a partial 851 * chunk. 852 */ 853 if (!buff_user_len || 854 !IS_ALIGNED(buff_user_len, increment_len)) 855 goto out; 856 857 /* 858 * Allocate a buffer from which we will later copy to the user 859 * process. We don't want userspace to dictate our buffer size 860 * so we limit it to DUMP_BUFF_LEN. 861 */ 862 ret = -ENOMEM; 863 buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN); 864 buff_kvm = vzalloc(buff_kvm_size); 865 if (!buff_kvm) 866 goto out; 867 868 ret = 0; 869 uvcb.dump_area_origin = (u64)buff_kvm; 870 /* We will loop until the user buffer is filled or an error occurs */ 871 do { 872 /* Get 1MB worth of guest storage state data */ 873 cc = uv_call_sched(0, (u64)&uvcb); 874 875 /* All or nothing */ 876 if (cc) { 877 ret = -EINVAL; 878 break; 879 } 880 881 size_done += increment_len; 882 uvcb.dump_area_origin += increment_len; 883 buff_user_len -= increment_len; 884 uvcb.gaddr += HPAGE_SIZE; 885 886 /* KVM Buffer full, time to copy to the process */ 887 if (!buff_user_len || size_done == DUMP_BUFF_LEN) { 888 if (copy_to_user(buff_user, buff_kvm, size_done)) { 889 ret = -EFAULT; 890 break; 891 } 892 893 buff_user += size_done; 894 size_done = 0; 895 uvcb.dump_area_origin = (u64)buff_kvm; 896 } 897 } while (buff_user_len); 898 899 /* Report back where we ended dumping */ 900 *gaddr = uvcb.gaddr; 901 902 /* Lets only log errors, we don't want to spam */ 903 out: 904 if (ret) 905 KVM_UV_EVENT(kvm, 3, 906 "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x", 907 uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc); 908 *rc = uvcb.header.rc; 909 *rrc = uvcb.header.rrc; 910 vfree(buff_kvm); 911 912 return ret; 913 } 914 915 /** 916 * kvm_s390_pv_dump_complete 917 * 918 * @kvm: pointer to the guest's KVM struct 919 * @buff_user: Userspace pointer where we will write the results to 920 * @rc: Pointer to where the uvcb return code is stored 921 * @rrc: Pointer to where the uvcb return reason code is stored 922 * 923 * Completes the dumping operation and writes the completion data to 924 * user space. 925 * 926 * Context: kvm->lock needs to be held 927 * 928 * Return: 929 * 0 on success 930 * -ENOMEM if allocating the completion buffer fails 931 * -EINVAL if the UV call fails, rc and rrc will be set in this case 932 * -EFAULT if copying the result to buff_user failed 933 */ 934 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, 935 u16 *rc, u16 *rrc) 936 { 937 struct uv_cb_dump_complete complete = { 938 .header.len = sizeof(complete), 939 .header.cmd = UVC_CMD_DUMP_COMPLETE, 940 .config_handle = kvm_s390_pv_get_handle(kvm), 941 }; 942 u64 *compl_data; 943 int ret; 944 945 /* Allocate dump area */ 946 compl_data = vzalloc(uv_info.conf_dump_finalize_len); 947 if (!compl_data) 948 return -ENOMEM; 949 complete.dump_area_origin = (u64)compl_data; 950 951 ret = uv_call_sched(0, (u64)&complete); 952 *rc = complete.header.rc; 953 *rrc = complete.header.rrc; 954 KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x", 955 complete.header.rc, complete.header.rrc); 956 957 if (!ret) { 958 /* 959 * kvm_s390_pv_dealloc_vm() will also (mem)set 960 * this to false on a reboot or other destroy 961 * operation for this vm. 962 */ 963 kvm->arch.pv.dumping = false; 964 kvm_s390_vcpu_unblock_all(kvm); 965 ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len); 966 if (ret) 967 ret = -EFAULT; 968 } 969 vfree(compl_data); 970 /* If the UVC returned an error, translate it to -EINVAL */ 971 if (ret > 0) 972 ret = -EINVAL; 973 return ret; 974 } 975