1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * s390 kvm PCI passthrough support 4 * 5 * Copyright IBM Corp. 2022 6 * 7 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com> 8 */ 9 10 #include <linux/kvm_host.h> 11 #include <linux/pci.h> 12 #include <asm/pci.h> 13 #include <asm/pci_insn.h> 14 #include <asm/pci_io.h> 15 #include <asm/sclp.h> 16 #include "pci.h" 17 #include "kvm-s390.h" 18 19 struct zpci_aift *aift; 20 21 static inline int __set_irq_noiib(u16 ctl, u8 isc) 22 { 23 union zpci_sic_iib iib = {{0}}; 24 25 return zpci_set_irq_ctrl(ctl, isc, &iib); 26 } 27 28 void kvm_s390_pci_aen_exit(void) 29 { 30 unsigned long flags; 31 struct kvm_zdev **gait_kzdev; 32 33 lockdep_assert_held(&aift->aift_lock); 34 35 /* 36 * Contents of the aipb remain registered for the life of the host 37 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv 38 * in case we insert the KVM module again later. Clear the AIFT 39 * information and free anything not registered with underlying 40 * firmware. 41 */ 42 spin_lock_irqsave(&aift->gait_lock, flags); 43 gait_kzdev = aift->kzdev; 44 aift->gait = NULL; 45 aift->sbv = NULL; 46 aift->kzdev = NULL; 47 spin_unlock_irqrestore(&aift->gait_lock, flags); 48 49 kfree(gait_kzdev); 50 } 51 52 static int zpci_setup_aipb(u8 nisc) 53 { 54 struct page *page; 55 int size, rc; 56 57 zpci_aipb = kzalloc_obj(union zpci_sic_iib); 58 if (!zpci_aipb) 59 return -ENOMEM; 60 61 aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL); 62 if (!aift->sbv) { 63 rc = -ENOMEM; 64 goto free_aipb; 65 } 66 zpci_aif_sbv = aift->sbv; 67 size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES * 68 sizeof(struct zpci_gaite))); 69 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size); 70 if (!page) { 71 rc = -ENOMEM; 72 goto free_sbv; 73 } 74 aift->gait = (struct zpci_gaite *)page_to_virt(page); 75 76 zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector); 77 zpci_aipb->aipb.gait = virt_to_phys(aift->gait); 78 zpci_aipb->aipb.afi = nisc; 79 zpci_aipb->aipb.faal = ZPCI_NR_DEVICES; 80 81 /* Setup Adapter Event Notification Interpretation */ 82 if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) { 83 rc = -EIO; 84 goto free_gait; 85 } 86 87 return 0; 88 89 free_gait: 90 free_pages((unsigned long)aift->gait, size); 91 free_sbv: 92 airq_iv_release(aift->sbv); 93 zpci_aif_sbv = NULL; 94 free_aipb: 95 kfree(zpci_aipb); 96 zpci_aipb = NULL; 97 98 return rc; 99 } 100 101 static int zpci_reset_aipb(u8 nisc) 102 { 103 /* 104 * AEN registration can only happen once per system boot. If 105 * an aipb already exists then AEN was already registered and 106 * we can reuse the aipb contents. This can only happen if 107 * the KVM module was removed and re-inserted. However, we must 108 * ensure that the same forwarding ISC is used as this is assigned 109 * during KVM module load. 110 */ 111 if (zpci_aipb->aipb.afi != nisc) 112 return -EINVAL; 113 114 aift->sbv = zpci_aif_sbv; 115 aift->gait = phys_to_virt(zpci_aipb->aipb.gait); 116 117 return 0; 118 } 119 120 int kvm_s390_pci_aen_init(u8 nisc) 121 { 122 int rc = 0; 123 124 /* If already enabled for AEN, bail out now */ 125 if (aift->gait || aift->sbv) 126 return -EPERM; 127 128 mutex_lock(&aift->aift_lock); 129 aift->kzdev = kzalloc_objs(struct kvm_zdev *, ZPCI_NR_DEVICES); 130 if (!aift->kzdev) { 131 rc = -ENOMEM; 132 goto unlock; 133 } 134 135 if (!zpci_aipb) 136 rc = zpci_setup_aipb(nisc); 137 else 138 rc = zpci_reset_aipb(nisc); 139 if (rc) 140 goto free_zdev; 141 142 /* Enable floating IRQs */ 143 if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) { 144 rc = -EIO; 145 kvm_s390_pci_aen_exit(); 146 } 147 148 goto unlock; 149 150 free_zdev: 151 kfree(aift->kzdev); 152 unlock: 153 mutex_unlock(&aift->aift_lock); 154 return rc; 155 } 156 157 /* Modify PCI: Register floating adapter interruption forwarding */ 158 static int kvm_zpci_set_airq(struct zpci_dev *zdev) 159 { 160 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); 161 struct zpci_fib fib = {}; 162 u8 status; 163 164 fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc; 165 fib.fmt0.sum = 1; /* enable summary notifications */ 166 fib.fmt0.noi = airq_iv_end(zdev->aibv); 167 fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector); 168 fib.fmt0.aibvo = 0; 169 fib.fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8; 170 fib.fmt0.aisbo = zdev->aisb & 63; 171 fib.gd = zdev->gisa; 172 173 return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; 174 } 175 176 /* Modify PCI: Unregister floating adapter interruption forwarding */ 177 static int kvm_zpci_clear_airq(struct zpci_dev *zdev) 178 { 179 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); 180 struct zpci_fib fib = {}; 181 u8 cc, status; 182 183 fib.gd = zdev->gisa; 184 185 cc = zpci_mod_fc(req, &fib, &status); 186 if (cc == 3 || (cc == 1 && status == 24)) 187 /* Function already gone or IRQs already deregistered. */ 188 cc = 0; 189 190 return cc ? -EIO : 0; 191 } 192 193 static inline void unaccount_mem(unsigned long nr_pages) 194 { 195 struct user_struct *user = get_uid(current_user()); 196 197 if (user) 198 atomic_long_sub(nr_pages, &user->locked_vm); 199 if (current->mm) 200 atomic64_sub(nr_pages, ¤t->mm->pinned_vm); 201 } 202 203 static inline int account_mem(unsigned long nr_pages) 204 { 205 struct user_struct *user = get_uid(current_user()); 206 unsigned long page_limit, cur_pages, new_pages; 207 208 page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 209 210 cur_pages = atomic_long_read(&user->locked_vm); 211 do { 212 new_pages = cur_pages + nr_pages; 213 if (new_pages > page_limit) 214 return -ENOMEM; 215 } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); 216 217 atomic64_add(nr_pages, ¤t->mm->pinned_vm); 218 219 return 0; 220 } 221 222 static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, 223 bool assist) 224 { 225 struct page *pages[1], *aibv_page, *aisb_page = NULL; 226 unsigned int msi_vecs, idx; 227 struct zpci_gaite *gaite; 228 unsigned long hva, bit; 229 struct kvm *kvm; 230 phys_addr_t gaddr; 231 int rc = 0, gisc, npages, pcount = 0; 232 233 /* 234 * Interrupt forwarding is only applicable if the device is already 235 * enabled for interpretation 236 */ 237 if (zdev->gisa == 0) 238 return -EINVAL; 239 240 kvm = zdev->kzdev->kvm; 241 msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi); 242 243 /* Get the associated forwarding ISC - if invalid, return the error */ 244 gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc); 245 if (gisc < 0) 246 return gisc; 247 248 /* Replace AIBV address */ 249 idx = srcu_read_lock(&kvm->srcu); 250 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv)); 251 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages); 252 srcu_read_unlock(&kvm->srcu, idx); 253 if (npages < 1) { 254 rc = -EIO; 255 goto out; 256 } 257 aibv_page = pages[0]; 258 pcount++; 259 gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK); 260 fib->fmt0.aibv = gaddr; 261 262 /* Pin the guest AISB if one was specified */ 263 if (fib->fmt0.sum == 1) { 264 idx = srcu_read_lock(&kvm->srcu); 265 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb)); 266 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, 267 pages); 268 srcu_read_unlock(&kvm->srcu, idx); 269 if (npages < 1) { 270 rc = -EIO; 271 goto unpin1; 272 } 273 aisb_page = pages[0]; 274 pcount++; 275 } 276 277 /* Account for pinned pages, roll back on failure */ 278 if (account_mem(pcount)) 279 goto unpin2; 280 281 /* AISB must be allocated before we can fill in GAITE */ 282 mutex_lock(&aift->aift_lock); 283 bit = airq_iv_alloc_bit(aift->sbv); 284 if (bit == -1UL) 285 goto unlock; 286 zdev->aisb = bit; /* store the summary bit number */ 287 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | 288 AIRQ_IV_BITLOCK | 289 AIRQ_IV_GUESTVEC, 290 phys_to_virt(fib->fmt0.aibv)); 291 292 spin_lock_irq(&aift->gait_lock); 293 gaite = aift->gait + zdev->aisb; 294 295 /* If assist not requested, host will get all alerts */ 296 if (assist) 297 gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 298 else 299 gaite->gisa = 0; 300 301 gaite->gisc = fib->fmt0.isc; 302 gaite->count++; 303 gaite->aisbo = fib->fmt0.aisbo; 304 gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb & 305 ~PAGE_MASK)); 306 aift->kzdev[zdev->aisb] = zdev->kzdev; 307 spin_unlock_irq(&aift->gait_lock); 308 309 /* Update guest FIB for re-issue */ 310 fib->fmt0.aisbo = zdev->aisb & 63; 311 fib->fmt0.aisb = virt_to_phys(aift->sbv->vector) + (zdev->aisb / 64) * 8; 312 fib->fmt0.isc = gisc; 313 314 /* Save some guest fib values in the host for later use */ 315 zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc; 316 zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv; 317 mutex_unlock(&aift->aift_lock); 318 319 /* Issue the clp to setup the irq now */ 320 rc = kvm_zpci_set_airq(zdev); 321 return rc; 322 323 unlock: 324 mutex_unlock(&aift->aift_lock); 325 unpin2: 326 if (fib->fmt0.sum == 1) 327 unpin_user_page(aisb_page); 328 unpin1: 329 unpin_user_page(aibv_page); 330 out: 331 return rc; 332 } 333 334 static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) 335 { 336 struct kvm_zdev *kzdev = zdev->kzdev; 337 struct zpci_gaite *gaite; 338 struct page *vpage = NULL, *spage = NULL; 339 int rc, pcount = 0; 340 u8 isc; 341 342 if (zdev->gisa == 0) 343 return -EINVAL; 344 345 mutex_lock(&aift->aift_lock); 346 347 /* 348 * If the clear fails due to an error, leave now unless we know this 349 * device is about to go away (force) -- In that case clear the GAITE 350 * regardless. 351 */ 352 rc = kvm_zpci_clear_airq(zdev); 353 if (rc && !force) 354 goto out; 355 356 if (zdev->kzdev->fib.fmt0.aibv == 0) 357 goto out; 358 spin_lock_irq(&aift->gait_lock); 359 gaite = aift->gait + zdev->aisb; 360 isc = gaite->gisc; 361 gaite->count--; 362 if (gaite->count == 0) { 363 /* Release guest AIBV and AISB */ 364 vpage = phys_to_page(kzdev->fib.fmt0.aibv); 365 if (gaite->aisb != 0) 366 spage = phys_to_page(gaite->aisb); 367 /* Clear the GAIT entry */ 368 gaite->aisb = 0; 369 gaite->gisc = 0; 370 gaite->aisbo = 0; 371 gaite->gisa = 0; 372 aift->kzdev[zdev->aisb] = NULL; 373 /* Clear zdev info */ 374 airq_iv_free_bit(aift->sbv, zdev->aisb); 375 airq_iv_release(zdev->aibv); 376 zdev->aisb = 0; 377 zdev->aibv = NULL; 378 } 379 spin_unlock_irq(&aift->gait_lock); 380 kvm_s390_gisc_unregister(kzdev->kvm, isc); 381 kzdev->fib.fmt0.isc = 0; 382 kzdev->fib.fmt0.aibv = 0; 383 384 if (vpage) { 385 unpin_user_page(vpage); 386 pcount++; 387 } 388 if (spage) { 389 unpin_user_page(spage); 390 pcount++; 391 } 392 if (pcount > 0) 393 unaccount_mem(pcount); 394 out: 395 mutex_unlock(&aift->aift_lock); 396 397 return rc; 398 } 399 400 static int kvm_s390_pci_dev_open(struct zpci_dev *zdev) 401 { 402 struct kvm_zdev *kzdev; 403 404 kzdev = kzalloc_obj(struct kvm_zdev); 405 if (!kzdev) 406 return -ENOMEM; 407 408 kzdev->zdev = zdev; 409 zdev->kzdev = kzdev; 410 411 return 0; 412 } 413 414 static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) 415 { 416 struct kvm_zdev *kzdev; 417 418 kzdev = zdev->kzdev; 419 WARN_ON(kzdev->zdev != zdev); 420 zdev->kzdev = NULL; 421 kfree(kzdev); 422 } 423 424 425 /* 426 * Register device with the specified KVM. If interpretation facilities are 427 * available, enable them and let userspace indicate whether or not they will 428 * be used (specify SHM bit to disable). 429 */ 430 static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) 431 { 432 struct zpci_dev *zdev = opaque; 433 int rc; 434 435 if (!zdev) 436 return -EINVAL; 437 438 mutex_lock(&zdev->kzdev_lock); 439 440 if (zdev->kzdev || zdev->gisa != 0 || !kvm) { 441 mutex_unlock(&zdev->kzdev_lock); 442 return -EINVAL; 443 } 444 445 kvm_get_kvm(kvm); 446 447 mutex_lock(&kvm->lock); 448 449 rc = kvm_s390_pci_dev_open(zdev); 450 if (rc) 451 goto err; 452 453 /* 454 * If interpretation facilities aren't available, add the device to 455 * the kzdev list but don't enable for interpretation. 456 */ 457 if (!kvm_s390_pci_interp_allowed()) 458 goto out; 459 460 /* 461 * If this is the first request to use an interpreted device, make the 462 * necessary vcpu changes 463 */ 464 if (!kvm->arch.use_zpci_interp) 465 kvm_s390_vcpu_pci_enable_interp(kvm); 466 467 if (zdev_enabled(zdev)) { 468 rc = zpci_disable_device(zdev); 469 if (rc) 470 goto err; 471 } 472 473 /* 474 * Store information about the identity of the kvm guest allowed to 475 * access this device via interpretation to be used by host CLP 476 */ 477 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 478 479 rc = zpci_reenable_device(zdev); 480 if (rc) 481 goto clear_gisa; 482 483 out: 484 zdev->kzdev->kvm = kvm; 485 486 spin_lock(&kvm->arch.kzdev_list_lock); 487 list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list); 488 spin_unlock(&kvm->arch.kzdev_list_lock); 489 490 mutex_unlock(&kvm->lock); 491 mutex_unlock(&zdev->kzdev_lock); 492 return 0; 493 494 clear_gisa: 495 zdev->gisa = 0; 496 err: 497 if (zdev->kzdev) 498 kvm_s390_pci_dev_release(zdev); 499 mutex_unlock(&kvm->lock); 500 mutex_unlock(&zdev->kzdev_lock); 501 kvm_put_kvm(kvm); 502 return rc; 503 } 504 505 static void kvm_s390_pci_unregister_kvm(void *opaque) 506 { 507 struct zpci_dev *zdev = opaque; 508 struct kvm *kvm; 509 510 if (!zdev) 511 return; 512 513 mutex_lock(&zdev->kzdev_lock); 514 515 if (WARN_ON(!zdev->kzdev)) { 516 mutex_unlock(&zdev->kzdev_lock); 517 return; 518 } 519 520 kvm = zdev->kzdev->kvm; 521 mutex_lock(&kvm->lock); 522 523 /* 524 * A 0 gisa means interpretation was never enabled, just remove the 525 * device from the list. 526 */ 527 if (zdev->gisa == 0) 528 goto out; 529 530 /* Forwarding must be turned off before interpretation */ 531 if (zdev->kzdev->fib.fmt0.aibv != 0) 532 kvm_s390_pci_aif_disable(zdev, true); 533 534 /* Remove the host CLP guest designation */ 535 zdev->gisa = 0; 536 537 if (zdev_enabled(zdev)) { 538 if (zpci_disable_device(zdev)) 539 goto out; 540 } 541 542 zpci_reenable_device(zdev); 543 544 out: 545 spin_lock(&kvm->arch.kzdev_list_lock); 546 list_del(&zdev->kzdev->entry); 547 spin_unlock(&kvm->arch.kzdev_list_lock); 548 kvm_s390_pci_dev_release(zdev); 549 550 mutex_unlock(&kvm->lock); 551 mutex_unlock(&zdev->kzdev_lock); 552 553 kvm_put_kvm(kvm); 554 } 555 556 void kvm_s390_pci_init_list(struct kvm *kvm) 557 { 558 spin_lock_init(&kvm->arch.kzdev_list_lock); 559 INIT_LIST_HEAD(&kvm->arch.kzdev_list); 560 } 561 562 void kvm_s390_pci_clear_list(struct kvm *kvm) 563 { 564 /* 565 * This list should already be empty, either via vfio device closures 566 * or kvm fd cleanup. 567 */ 568 spin_lock(&kvm->arch.kzdev_list_lock); 569 WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list)); 570 spin_unlock(&kvm->arch.kzdev_list_lock); 571 } 572 573 static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh) 574 { 575 struct zpci_dev *zdev = NULL; 576 struct kvm_zdev *kzdev; 577 578 spin_lock(&kvm->arch.kzdev_list_lock); 579 list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) { 580 if (kzdev->zdev->fh == fh) { 581 zdev = kzdev->zdev; 582 break; 583 } 584 } 585 spin_unlock(&kvm->arch.kzdev_list_lock); 586 587 return zdev; 588 } 589 590 static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev, 591 struct kvm_s390_zpci_op *args) 592 { 593 struct zpci_fib fib = {}; 594 bool hostflag; 595 596 fib.fmt0.aibv = args->u.reg_aen.ibv; 597 fib.fmt0.isc = args->u.reg_aen.isc; 598 fib.fmt0.noi = args->u.reg_aen.noi; 599 if (args->u.reg_aen.sb != 0) { 600 fib.fmt0.aisb = args->u.reg_aen.sb; 601 fib.fmt0.aisbo = args->u.reg_aen.sbo; 602 fib.fmt0.sum = 1; 603 } else { 604 fib.fmt0.aisb = 0; 605 fib.fmt0.aisbo = 0; 606 fib.fmt0.sum = 0; 607 } 608 609 hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST); 610 return kvm_s390_pci_aif_enable(zdev, &fib, hostflag); 611 } 612 613 int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args) 614 { 615 struct kvm_zdev *kzdev; 616 struct zpci_dev *zdev; 617 int r; 618 619 zdev = get_zdev_from_kvm_by_fh(kvm, args->fh); 620 if (!zdev) 621 return -ENODEV; 622 623 mutex_lock(&zdev->kzdev_lock); 624 mutex_lock(&kvm->lock); 625 626 kzdev = zdev->kzdev; 627 if (!kzdev) { 628 r = -ENODEV; 629 goto out; 630 } 631 if (kzdev->kvm != kvm) { 632 r = -EPERM; 633 goto out; 634 } 635 636 switch (args->op) { 637 case KVM_S390_ZPCIOP_REG_AEN: 638 /* Fail on unknown flags */ 639 if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) { 640 r = -EINVAL; 641 break; 642 } 643 r = kvm_s390_pci_zpci_reg_aen(zdev, args); 644 break; 645 case KVM_S390_ZPCIOP_DEREG_AEN: 646 r = kvm_s390_pci_aif_disable(zdev, false); 647 break; 648 default: 649 r = -EINVAL; 650 } 651 652 out: 653 mutex_unlock(&kvm->lock); 654 mutex_unlock(&zdev->kzdev_lock); 655 return r; 656 } 657 658 int __init kvm_s390_pci_init(void) 659 { 660 zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; 661 zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; 662 663 if (!kvm_s390_pci_interp_allowed()) 664 return 0; 665 666 aift = kzalloc_obj(struct zpci_aift); 667 if (!aift) 668 return -ENOMEM; 669 670 spin_lock_init(&aift->gait_lock); 671 mutex_init(&aift->aift_lock); 672 673 return 0; 674 } 675 676 void kvm_s390_pci_exit(void) 677 { 678 zpci_kvm_hook.kvm_register = NULL; 679 zpci_kvm_hook.kvm_unregister = NULL; 680 681 if (!kvm_s390_pci_interp_allowed()) 682 return; 683 684 mutex_destroy(&aift->aift_lock); 685 686 kfree(aift); 687 } 688