1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * s390 kvm PCI passthrough support 4 * 5 * Copyright IBM Corp. 2022 6 * 7 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com> 8 */ 9 10 #include <linux/kvm_host.h> 11 #include <linux/pci.h> 12 #include <asm/pci.h> 13 #include <asm/pci_insn.h> 14 #include <asm/pci_io.h> 15 #include <asm/sclp.h> 16 #include "pci.h" 17 #include "kvm-s390.h" 18 19 struct zpci_aift *aift; 20 21 static inline int __set_irq_noiib(u16 ctl, u8 isc) 22 { 23 union zpci_sic_iib iib = {{0}}; 24 25 return zpci_set_irq_ctrl(ctl, isc, &iib); 26 } 27 28 void kvm_s390_pci_aen_exit(void) 29 { 30 unsigned long flags; 31 struct kvm_zdev **gait_kzdev; 32 33 lockdep_assert_held(&aift->aift_lock); 34 35 /* 36 * Contents of the aipb remain registered for the life of the host 37 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv 38 * in case we insert the KVM module again later. Clear the AIFT 39 * information and free anything not registered with underlying 40 * firmware. 41 */ 42 spin_lock_irqsave(&aift->gait_lock, flags); 43 gait_kzdev = aift->kzdev; 44 aift->gait = NULL; 45 aift->sbv = NULL; 46 aift->kzdev = NULL; 47 spin_unlock_irqrestore(&aift->gait_lock, flags); 48 49 kfree(gait_kzdev); 50 } 51 52 static int zpci_setup_aipb(u8 nisc) 53 { 54 struct page *page; 55 int size, rc; 56 57 zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL); 58 if (!zpci_aipb) 59 return -ENOMEM; 60 61 aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL); 62 if (!aift->sbv) { 63 rc = -ENOMEM; 64 goto free_aipb; 65 } 66 zpci_aif_sbv = aift->sbv; 67 size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES * 68 sizeof(struct zpci_gaite))); 69 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size); 70 if (!page) { 71 rc = -ENOMEM; 72 goto free_sbv; 73 } 74 aift->gait = (struct zpci_gaite *)page_to_virt(page); 75 76 zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector); 77 zpci_aipb->aipb.gait = virt_to_phys(aift->gait); 78 zpci_aipb->aipb.afi = nisc; 79 zpci_aipb->aipb.faal = ZPCI_NR_DEVICES; 80 81 /* Setup Adapter Event Notification Interpretation */ 82 if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) { 83 rc = -EIO; 84 goto free_gait; 85 } 86 87 return 0; 88 89 free_gait: 90 free_pages((unsigned long)aift->gait, size); 91 free_sbv: 92 airq_iv_release(aift->sbv); 93 zpci_aif_sbv = NULL; 94 free_aipb: 95 kfree(zpci_aipb); 96 zpci_aipb = NULL; 97 98 return rc; 99 } 100 101 static int zpci_reset_aipb(u8 nisc) 102 { 103 /* 104 * AEN registration can only happen once per system boot. If 105 * an aipb already exists then AEN was already registered and 106 * we can reuse the aipb contents. This can only happen if 107 * the KVM module was removed and re-inserted. However, we must 108 * ensure that the same forwarding ISC is used as this is assigned 109 * during KVM module load. 110 */ 111 if (zpci_aipb->aipb.afi != nisc) 112 return -EINVAL; 113 114 aift->sbv = zpci_aif_sbv; 115 aift->gait = phys_to_virt(zpci_aipb->aipb.gait); 116 117 return 0; 118 } 119 120 int kvm_s390_pci_aen_init(u8 nisc) 121 { 122 int rc = 0; 123 124 /* If already enabled for AEN, bail out now */ 125 if (aift->gait || aift->sbv) 126 return -EPERM; 127 128 mutex_lock(&aift->aift_lock); 129 aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *), 130 GFP_KERNEL); 131 if (!aift->kzdev) { 132 rc = -ENOMEM; 133 goto unlock; 134 } 135 136 if (!zpci_aipb) 137 rc = zpci_setup_aipb(nisc); 138 else 139 rc = zpci_reset_aipb(nisc); 140 if (rc) 141 goto free_zdev; 142 143 /* Enable floating IRQs */ 144 if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) { 145 rc = -EIO; 146 kvm_s390_pci_aen_exit(); 147 } 148 149 goto unlock; 150 151 free_zdev: 152 kfree(aift->kzdev); 153 unlock: 154 mutex_unlock(&aift->aift_lock); 155 return rc; 156 } 157 158 /* Modify PCI: Register floating adapter interruption forwarding */ 159 static int kvm_zpci_set_airq(struct zpci_dev *zdev) 160 { 161 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); 162 struct zpci_fib fib = {}; 163 u8 status; 164 165 fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc; 166 fib.fmt0.sum = 1; /* enable summary notifications */ 167 fib.fmt0.noi = airq_iv_end(zdev->aibv); 168 fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector); 169 fib.fmt0.aibvo = 0; 170 fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); 171 fib.fmt0.aisbo = zdev->aisb & 63; 172 fib.gd = zdev->gisa; 173 174 return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; 175 } 176 177 /* Modify PCI: Unregister floating adapter interruption forwarding */ 178 static int kvm_zpci_clear_airq(struct zpci_dev *zdev) 179 { 180 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); 181 struct zpci_fib fib = {}; 182 u8 cc, status; 183 184 fib.gd = zdev->gisa; 185 186 cc = zpci_mod_fc(req, &fib, &status); 187 if (cc == 3 || (cc == 1 && status == 24)) 188 /* Function already gone or IRQs already deregistered. */ 189 cc = 0; 190 191 return cc ? -EIO : 0; 192 } 193 194 static inline void unaccount_mem(unsigned long nr_pages) 195 { 196 struct user_struct *user = get_uid(current_user()); 197 198 if (user) 199 atomic_long_sub(nr_pages, &user->locked_vm); 200 if (current->mm) 201 atomic64_sub(nr_pages, ¤t->mm->pinned_vm); 202 } 203 204 static inline int account_mem(unsigned long nr_pages) 205 { 206 struct user_struct *user = get_uid(current_user()); 207 unsigned long page_limit, cur_pages, new_pages; 208 209 page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 210 211 cur_pages = atomic_long_read(&user->locked_vm); 212 do { 213 new_pages = cur_pages + nr_pages; 214 if (new_pages > page_limit) 215 return -ENOMEM; 216 } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); 217 218 atomic64_add(nr_pages, ¤t->mm->pinned_vm); 219 220 return 0; 221 } 222 223 static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, 224 bool assist) 225 { 226 struct page *pages[1], *aibv_page, *aisb_page = NULL; 227 unsigned int msi_vecs, idx; 228 struct zpci_gaite *gaite; 229 unsigned long hva, bit; 230 struct kvm *kvm; 231 phys_addr_t gaddr; 232 int rc = 0, gisc, npages, pcount = 0; 233 234 /* 235 * Interrupt forwarding is only applicable if the device is already 236 * enabled for interpretation 237 */ 238 if (zdev->gisa == 0) 239 return -EINVAL; 240 241 kvm = zdev->kzdev->kvm; 242 msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi); 243 244 /* Get the associated forwarding ISC - if invalid, return the error */ 245 gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc); 246 if (gisc < 0) 247 return gisc; 248 249 /* Replace AIBV address */ 250 idx = srcu_read_lock(&kvm->srcu); 251 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv)); 252 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages); 253 srcu_read_unlock(&kvm->srcu, idx); 254 if (npages < 1) { 255 rc = -EIO; 256 goto out; 257 } 258 aibv_page = pages[0]; 259 pcount++; 260 gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK); 261 fib->fmt0.aibv = gaddr; 262 263 /* Pin the guest AISB if one was specified */ 264 if (fib->fmt0.sum == 1) { 265 idx = srcu_read_lock(&kvm->srcu); 266 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb)); 267 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, 268 pages); 269 srcu_read_unlock(&kvm->srcu, idx); 270 if (npages < 1) { 271 rc = -EIO; 272 goto unpin1; 273 } 274 aisb_page = pages[0]; 275 pcount++; 276 } 277 278 /* Account for pinned pages, roll back on failure */ 279 if (account_mem(pcount)) 280 goto unpin2; 281 282 /* AISB must be allocated before we can fill in GAITE */ 283 mutex_lock(&aift->aift_lock); 284 bit = airq_iv_alloc_bit(aift->sbv); 285 if (bit == -1UL) 286 goto unlock; 287 zdev->aisb = bit; /* store the summary bit number */ 288 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | 289 AIRQ_IV_BITLOCK | 290 AIRQ_IV_GUESTVEC, 291 phys_to_virt(fib->fmt0.aibv)); 292 293 spin_lock_irq(&aift->gait_lock); 294 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * 295 sizeof(struct zpci_gaite)); 296 297 /* If assist not requested, host will get all alerts */ 298 if (assist) 299 gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 300 else 301 gaite->gisa = 0; 302 303 gaite->gisc = fib->fmt0.isc; 304 gaite->count++; 305 gaite->aisbo = fib->fmt0.aisbo; 306 gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb & 307 ~PAGE_MASK)); 308 aift->kzdev[zdev->aisb] = zdev->kzdev; 309 spin_unlock_irq(&aift->gait_lock); 310 311 /* Update guest FIB for re-issue */ 312 fib->fmt0.aisbo = zdev->aisb & 63; 313 fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); 314 fib->fmt0.isc = gisc; 315 316 /* Save some guest fib values in the host for later use */ 317 zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc; 318 zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv; 319 mutex_unlock(&aift->aift_lock); 320 321 /* Issue the clp to setup the irq now */ 322 rc = kvm_zpci_set_airq(zdev); 323 return rc; 324 325 unlock: 326 mutex_unlock(&aift->aift_lock); 327 unpin2: 328 if (fib->fmt0.sum == 1) 329 unpin_user_page(aisb_page); 330 unpin1: 331 unpin_user_page(aibv_page); 332 out: 333 return rc; 334 } 335 336 static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) 337 { 338 struct kvm_zdev *kzdev = zdev->kzdev; 339 struct zpci_gaite *gaite; 340 struct page *vpage = NULL, *spage = NULL; 341 int rc, pcount = 0; 342 u8 isc; 343 344 if (zdev->gisa == 0) 345 return -EINVAL; 346 347 mutex_lock(&aift->aift_lock); 348 349 /* 350 * If the clear fails due to an error, leave now unless we know this 351 * device is about to go away (force) -- In that case clear the GAITE 352 * regardless. 353 */ 354 rc = kvm_zpci_clear_airq(zdev); 355 if (rc && !force) 356 goto out; 357 358 if (zdev->kzdev->fib.fmt0.aibv == 0) 359 goto out; 360 spin_lock_irq(&aift->gait_lock); 361 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * 362 sizeof(struct zpci_gaite)); 363 isc = gaite->gisc; 364 gaite->count--; 365 if (gaite->count == 0) { 366 /* Release guest AIBV and AISB */ 367 vpage = phys_to_page(kzdev->fib.fmt0.aibv); 368 if (gaite->aisb != 0) 369 spage = phys_to_page(gaite->aisb); 370 /* Clear the GAIT entry */ 371 gaite->aisb = 0; 372 gaite->gisc = 0; 373 gaite->aisbo = 0; 374 gaite->gisa = 0; 375 aift->kzdev[zdev->aisb] = NULL; 376 /* Clear zdev info */ 377 airq_iv_free_bit(aift->sbv, zdev->aisb); 378 airq_iv_release(zdev->aibv); 379 zdev->aisb = 0; 380 zdev->aibv = NULL; 381 } 382 spin_unlock_irq(&aift->gait_lock); 383 kvm_s390_gisc_unregister(kzdev->kvm, isc); 384 kzdev->fib.fmt0.isc = 0; 385 kzdev->fib.fmt0.aibv = 0; 386 387 if (vpage) { 388 unpin_user_page(vpage); 389 pcount++; 390 } 391 if (spage) { 392 unpin_user_page(spage); 393 pcount++; 394 } 395 if (pcount > 0) 396 unaccount_mem(pcount); 397 out: 398 mutex_unlock(&aift->aift_lock); 399 400 return rc; 401 } 402 403 static int kvm_s390_pci_dev_open(struct zpci_dev *zdev) 404 { 405 struct kvm_zdev *kzdev; 406 407 kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL); 408 if (!kzdev) 409 return -ENOMEM; 410 411 kzdev->zdev = zdev; 412 zdev->kzdev = kzdev; 413 414 return 0; 415 } 416 417 static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) 418 { 419 struct kvm_zdev *kzdev; 420 421 kzdev = zdev->kzdev; 422 WARN_ON(kzdev->zdev != zdev); 423 zdev->kzdev = NULL; 424 kfree(kzdev); 425 } 426 427 428 /* 429 * Register device with the specified KVM. If interpretation facilities are 430 * available, enable them and let userspace indicate whether or not they will 431 * be used (specify SHM bit to disable). 432 */ 433 static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) 434 { 435 struct zpci_dev *zdev = opaque; 436 int rc; 437 438 if (!zdev) 439 return -EINVAL; 440 441 mutex_lock(&zdev->kzdev_lock); 442 443 if (zdev->kzdev || zdev->gisa != 0 || !kvm) { 444 mutex_unlock(&zdev->kzdev_lock); 445 return -EINVAL; 446 } 447 448 kvm_get_kvm(kvm); 449 450 mutex_lock(&kvm->lock); 451 452 rc = kvm_s390_pci_dev_open(zdev); 453 if (rc) 454 goto err; 455 456 /* 457 * If interpretation facilities aren't available, add the device to 458 * the kzdev list but don't enable for interpretation. 459 */ 460 if (!kvm_s390_pci_interp_allowed()) 461 goto out; 462 463 /* 464 * If this is the first request to use an interpreted device, make the 465 * necessary vcpu changes 466 */ 467 if (!kvm->arch.use_zpci_interp) 468 kvm_s390_vcpu_pci_enable_interp(kvm); 469 470 if (zdev_enabled(zdev)) { 471 rc = zpci_disable_device(zdev); 472 if (rc) 473 goto err; 474 } 475 476 /* 477 * Store information about the identity of the kvm guest allowed to 478 * access this device via interpretation to be used by host CLP 479 */ 480 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 481 482 rc = zpci_reenable_device(zdev); 483 if (rc) 484 goto clear_gisa; 485 486 out: 487 zdev->kzdev->kvm = kvm; 488 489 spin_lock(&kvm->arch.kzdev_list_lock); 490 list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list); 491 spin_unlock(&kvm->arch.kzdev_list_lock); 492 493 mutex_unlock(&kvm->lock); 494 mutex_unlock(&zdev->kzdev_lock); 495 return 0; 496 497 clear_gisa: 498 zdev->gisa = 0; 499 err: 500 if (zdev->kzdev) 501 kvm_s390_pci_dev_release(zdev); 502 mutex_unlock(&kvm->lock); 503 mutex_unlock(&zdev->kzdev_lock); 504 kvm_put_kvm(kvm); 505 return rc; 506 } 507 508 static void kvm_s390_pci_unregister_kvm(void *opaque) 509 { 510 struct zpci_dev *zdev = opaque; 511 struct kvm *kvm; 512 513 if (!zdev) 514 return; 515 516 mutex_lock(&zdev->kzdev_lock); 517 518 if (WARN_ON(!zdev->kzdev)) { 519 mutex_unlock(&zdev->kzdev_lock); 520 return; 521 } 522 523 kvm = zdev->kzdev->kvm; 524 mutex_lock(&kvm->lock); 525 526 /* 527 * A 0 gisa means interpretation was never enabled, just remove the 528 * device from the list. 529 */ 530 if (zdev->gisa == 0) 531 goto out; 532 533 /* Forwarding must be turned off before interpretation */ 534 if (zdev->kzdev->fib.fmt0.aibv != 0) 535 kvm_s390_pci_aif_disable(zdev, true); 536 537 /* Remove the host CLP guest designation */ 538 zdev->gisa = 0; 539 540 if (zdev_enabled(zdev)) { 541 if (zpci_disable_device(zdev)) 542 goto out; 543 } 544 545 zpci_reenable_device(zdev); 546 547 out: 548 spin_lock(&kvm->arch.kzdev_list_lock); 549 list_del(&zdev->kzdev->entry); 550 spin_unlock(&kvm->arch.kzdev_list_lock); 551 kvm_s390_pci_dev_release(zdev); 552 553 mutex_unlock(&kvm->lock); 554 mutex_unlock(&zdev->kzdev_lock); 555 556 kvm_put_kvm(kvm); 557 } 558 559 void kvm_s390_pci_init_list(struct kvm *kvm) 560 { 561 spin_lock_init(&kvm->arch.kzdev_list_lock); 562 INIT_LIST_HEAD(&kvm->arch.kzdev_list); 563 } 564 565 void kvm_s390_pci_clear_list(struct kvm *kvm) 566 { 567 /* 568 * This list should already be empty, either via vfio device closures 569 * or kvm fd cleanup. 570 */ 571 spin_lock(&kvm->arch.kzdev_list_lock); 572 WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list)); 573 spin_unlock(&kvm->arch.kzdev_list_lock); 574 } 575 576 static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh) 577 { 578 struct zpci_dev *zdev = NULL; 579 struct kvm_zdev *kzdev; 580 581 spin_lock(&kvm->arch.kzdev_list_lock); 582 list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) { 583 if (kzdev->zdev->fh == fh) { 584 zdev = kzdev->zdev; 585 break; 586 } 587 } 588 spin_unlock(&kvm->arch.kzdev_list_lock); 589 590 return zdev; 591 } 592 593 static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev, 594 struct kvm_s390_zpci_op *args) 595 { 596 struct zpci_fib fib = {}; 597 bool hostflag; 598 599 fib.fmt0.aibv = args->u.reg_aen.ibv; 600 fib.fmt0.isc = args->u.reg_aen.isc; 601 fib.fmt0.noi = args->u.reg_aen.noi; 602 if (args->u.reg_aen.sb != 0) { 603 fib.fmt0.aisb = args->u.reg_aen.sb; 604 fib.fmt0.aisbo = args->u.reg_aen.sbo; 605 fib.fmt0.sum = 1; 606 } else { 607 fib.fmt0.aisb = 0; 608 fib.fmt0.aisbo = 0; 609 fib.fmt0.sum = 0; 610 } 611 612 hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST); 613 return kvm_s390_pci_aif_enable(zdev, &fib, hostflag); 614 } 615 616 int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args) 617 { 618 struct kvm_zdev *kzdev; 619 struct zpci_dev *zdev; 620 int r; 621 622 zdev = get_zdev_from_kvm_by_fh(kvm, args->fh); 623 if (!zdev) 624 return -ENODEV; 625 626 mutex_lock(&zdev->kzdev_lock); 627 mutex_lock(&kvm->lock); 628 629 kzdev = zdev->kzdev; 630 if (!kzdev) { 631 r = -ENODEV; 632 goto out; 633 } 634 if (kzdev->kvm != kvm) { 635 r = -EPERM; 636 goto out; 637 } 638 639 switch (args->op) { 640 case KVM_S390_ZPCIOP_REG_AEN: 641 /* Fail on unknown flags */ 642 if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) { 643 r = -EINVAL; 644 break; 645 } 646 r = kvm_s390_pci_zpci_reg_aen(zdev, args); 647 break; 648 case KVM_S390_ZPCIOP_DEREG_AEN: 649 r = kvm_s390_pci_aif_disable(zdev, false); 650 break; 651 default: 652 r = -EINVAL; 653 } 654 655 out: 656 mutex_unlock(&kvm->lock); 657 mutex_unlock(&zdev->kzdev_lock); 658 return r; 659 } 660 661 int __init kvm_s390_pci_init(void) 662 { 663 zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; 664 zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; 665 666 if (!kvm_s390_pci_interp_allowed()) 667 return 0; 668 669 aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL); 670 if (!aift) 671 return -ENOMEM; 672 673 spin_lock_init(&aift->gait_lock); 674 mutex_init(&aift->aift_lock); 675 676 return 0; 677 } 678 679 void kvm_s390_pci_exit(void) 680 { 681 zpci_kvm_hook.kvm_register = NULL; 682 zpci_kvm_hook.kvm_unregister = NULL; 683 684 if (!kvm_s390_pci_interp_allowed()) 685 return; 686 687 mutex_destroy(&aift->aift_lock); 688 689 kfree(aift); 690 } 691