// SPDX-License-Identifier: GPL-2.0 /* * s390 kvm PCI passthrough support * * Copyright IBM Corp. 2022 * * Author(s): Matthew Rosato */ #include #include #include #include #include #include #include "pci.h" #include "kvm-s390.h" struct zpci_aift *aift; static inline int __set_irq_noiib(u16 ctl, u8 isc) { union zpci_sic_iib iib = {{0}}; return zpci_set_irq_ctrl(ctl, isc, &iib); } void kvm_s390_pci_aen_exit(void) { unsigned long flags; struct kvm_zdev **gait_kzdev; lockdep_assert_held(&aift->aift_lock); /* * Contents of the aipb remain registered for the life of the host * kernel, the information preserved in zpci_aipb and zpci_aif_sbv * in case we insert the KVM module again later. Clear the AIFT * information and free anything not registered with underlying * firmware. */ spin_lock_irqsave(&aift->gait_lock, flags); gait_kzdev = aift->kzdev; aift->gait = NULL; aift->sbv = NULL; aift->kzdev = NULL; spin_unlock_irqrestore(&aift->gait_lock, flags); kfree(gait_kzdev); } static int zpci_setup_aipb(u8 nisc) { struct page *page; int size, rc; zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL); if (!zpci_aipb) return -ENOMEM; aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL); if (!aift->sbv) { rc = -ENOMEM; goto free_aipb; } zpci_aif_sbv = aift->sbv; size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES * sizeof(struct zpci_gaite))); page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size); if (!page) { rc = -ENOMEM; goto free_sbv; } aift->gait = (struct zpci_gaite *)page_to_virt(page); zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector); zpci_aipb->aipb.gait = virt_to_phys(aift->gait); zpci_aipb->aipb.afi = nisc; zpci_aipb->aipb.faal = ZPCI_NR_DEVICES; /* Setup Adapter Event Notification Interpretation */ if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) { rc = -EIO; goto free_gait; } return 0; free_gait: free_pages((unsigned long)aift->gait, size); free_sbv: airq_iv_release(aift->sbv); zpci_aif_sbv = NULL; free_aipb: kfree(zpci_aipb); zpci_aipb = NULL; return rc; } static int zpci_reset_aipb(u8 nisc) { /* * AEN registration can only happen once per system boot. If * an aipb already exists then AEN was already registered and * we can reuse the aipb contents. This can only happen if * the KVM module was removed and re-inserted. However, we must * ensure that the same forwarding ISC is used as this is assigned * during KVM module load. */ if (zpci_aipb->aipb.afi != nisc) return -EINVAL; aift->sbv = zpci_aif_sbv; aift->gait = phys_to_virt(zpci_aipb->aipb.gait); return 0; } int kvm_s390_pci_aen_init(u8 nisc) { int rc = 0; /* If already enabled for AEN, bail out now */ if (aift->gait || aift->sbv) return -EPERM; mutex_lock(&aift->aift_lock); aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *), GFP_KERNEL); if (!aift->kzdev) { rc = -ENOMEM; goto unlock; } if (!zpci_aipb) rc = zpci_setup_aipb(nisc); else rc = zpci_reset_aipb(nisc); if (rc) goto free_zdev; /* Enable floating IRQs */ if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) { rc = -EIO; kvm_s390_pci_aen_exit(); } goto unlock; free_zdev: kfree(aift->kzdev); unlock: mutex_unlock(&aift->aift_lock); return rc; } /* Modify PCI: Register floating adapter interruption forwarding */ static int kvm_zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); struct zpci_fib fib = {}; u8 status; fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc; fib.fmt0.sum = 1; /* enable summary notifications */ fib.fmt0.noi = airq_iv_end(zdev->aibv); fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector); fib.fmt0.aibvo = 0; fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); fib.fmt0.aisbo = zdev->aisb & 63; fib.gd = zdev->gisa; return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; } /* Modify PCI: Unregister floating adapter interruption forwarding */ static int kvm_zpci_clear_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); struct zpci_fib fib = {}; u8 cc, status; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, &status); if (cc == 3 || (cc == 1 && status == 24)) /* Function already gone or IRQs already deregistered. */ cc = 0; return cc ? -EIO : 0; } static inline void unaccount_mem(unsigned long nr_pages) { struct user_struct *user = get_uid(current_user()); if (user) atomic_long_sub(nr_pages, &user->locked_vm); if (current->mm) atomic64_sub(nr_pages, ¤t->mm->pinned_vm); } static inline int account_mem(unsigned long nr_pages) { struct user_struct *user = get_uid(current_user()); unsigned long page_limit, cur_pages, new_pages; page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; cur_pages = atomic_long_read(&user->locked_vm); do { new_pages = cur_pages + nr_pages; if (new_pages > page_limit) return -ENOMEM; } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages)); atomic64_add(nr_pages, ¤t->mm->pinned_vm); return 0; } static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, bool assist) { struct page *pages[1], *aibv_page, *aisb_page = NULL; unsigned int msi_vecs, idx; struct zpci_gaite *gaite; unsigned long hva, bit; struct kvm *kvm; phys_addr_t gaddr; int rc = 0, gisc, npages, pcount = 0; /* * Interrupt forwarding is only applicable if the device is already * enabled for interpretation */ if (zdev->gisa == 0) return -EINVAL; kvm = zdev->kzdev->kvm; msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi); /* Get the associated forwarding ISC - if invalid, return the error */ gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc); if (gisc < 0) return gisc; /* Replace AIBV address */ idx = srcu_read_lock(&kvm->srcu); hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv)); npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages); srcu_read_unlock(&kvm->srcu, idx); if (npages < 1) { rc = -EIO; goto out; } aibv_page = pages[0]; pcount++; gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK); fib->fmt0.aibv = gaddr; /* Pin the guest AISB if one was specified */ if (fib->fmt0.sum == 1) { idx = srcu_read_lock(&kvm->srcu); hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb)); npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages); srcu_read_unlock(&kvm->srcu, idx); if (npages < 1) { rc = -EIO; goto unpin1; } aisb_page = pages[0]; pcount++; } /* Account for pinned pages, roll back on failure */ if (account_mem(pcount)) goto unpin2; /* AISB must be allocated before we can fill in GAITE */ mutex_lock(&aift->aift_lock); bit = airq_iv_alloc_bit(aift->sbv); if (bit == -1UL) goto unlock; zdev->aisb = bit; /* store the summary bit number */ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK | AIRQ_IV_GUESTVEC, phys_to_virt(fib->fmt0.aibv)); spin_lock_irq(&aift->gait_lock); gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * sizeof(struct zpci_gaite)); /* If assist not requested, host will get all alerts */ if (assist) gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); else gaite->gisa = 0; gaite->gisc = fib->fmt0.isc; gaite->count++; gaite->aisbo = fib->fmt0.aisbo; gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb & ~PAGE_MASK)); aift->kzdev[zdev->aisb] = zdev->kzdev; spin_unlock_irq(&aift->gait_lock); /* Update guest FIB for re-issue */ fib->fmt0.aisbo = zdev->aisb & 63; fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); fib->fmt0.isc = gisc; /* Save some guest fib values in the host for later use */ zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc; zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv; mutex_unlock(&aift->aift_lock); /* Issue the clp to setup the irq now */ rc = kvm_zpci_set_airq(zdev); return rc; unlock: mutex_unlock(&aift->aift_lock); unpin2: if (fib->fmt0.sum == 1) unpin_user_page(aisb_page); unpin1: unpin_user_page(aibv_page); out: return rc; } static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) { struct kvm_zdev *kzdev = zdev->kzdev; struct zpci_gaite *gaite; struct page *vpage = NULL, *spage = NULL; int rc, pcount = 0; u8 isc; if (zdev->gisa == 0) return -EINVAL; mutex_lock(&aift->aift_lock); /* * If the clear fails due to an error, leave now unless we know this * device is about to go away (force) -- In that case clear the GAITE * regardless. */ rc = kvm_zpci_clear_airq(zdev); if (rc && !force) goto out; if (zdev->kzdev->fib.fmt0.aibv == 0) goto out; spin_lock_irq(&aift->gait_lock); gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * sizeof(struct zpci_gaite)); isc = gaite->gisc; gaite->count--; if (gaite->count == 0) { /* Release guest AIBV and AISB */ vpage = phys_to_page(kzdev->fib.fmt0.aibv); if (gaite->aisb != 0) spage = phys_to_page(gaite->aisb); /* Clear the GAIT entry */ gaite->aisb = 0; gaite->gisc = 0; gaite->aisbo = 0; gaite->gisa = 0; aift->kzdev[zdev->aisb] = NULL; /* Clear zdev info */ airq_iv_free_bit(aift->sbv, zdev->aisb); airq_iv_release(zdev->aibv); zdev->aisb = 0; zdev->aibv = NULL; } spin_unlock_irq(&aift->gait_lock); kvm_s390_gisc_unregister(kzdev->kvm, isc); kzdev->fib.fmt0.isc = 0; kzdev->fib.fmt0.aibv = 0; if (vpage) { unpin_user_page(vpage); pcount++; } if (spage) { unpin_user_page(spage); pcount++; } if (pcount > 0) unaccount_mem(pcount); out: mutex_unlock(&aift->aift_lock); return rc; } static int kvm_s390_pci_dev_open(struct zpci_dev *zdev) { struct kvm_zdev *kzdev; kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL); if (!kzdev) return -ENOMEM; kzdev->zdev = zdev; zdev->kzdev = kzdev; return 0; } static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) { struct kvm_zdev *kzdev; kzdev = zdev->kzdev; WARN_ON(kzdev->zdev != zdev); zdev->kzdev = NULL; kfree(kzdev); } /* * Register device with the specified KVM. If interpretation facilities are * available, enable them and let userspace indicate whether or not they will * be used (specify SHM bit to disable). */ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) { struct zpci_dev *zdev = opaque; u8 status; int rc; if (!zdev) return -EINVAL; mutex_lock(&zdev->kzdev_lock); if (zdev->kzdev || zdev->gisa != 0 || !kvm) { mutex_unlock(&zdev->kzdev_lock); return -EINVAL; } kvm_get_kvm(kvm); mutex_lock(&kvm->lock); rc = kvm_s390_pci_dev_open(zdev); if (rc) goto err; /* * If interpretation facilities aren't available, add the device to * the kzdev list but don't enable for interpretation. */ if (!kvm_s390_pci_interp_allowed()) goto out; /* * If this is the first request to use an interpreted device, make the * necessary vcpu changes */ if (!kvm->arch.use_zpci_interp) kvm_s390_vcpu_pci_enable_interp(kvm); if (zdev_enabled(zdev)) { rc = zpci_disable_device(zdev); if (rc) goto err; } /* * Store information about the identity of the kvm guest allowed to * access this device via interpretation to be used by host CLP */ zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); rc = zpci_enable_device(zdev); if (rc) goto clear_gisa; /* Re-register the IOMMU that was already created */ rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(zdev->dma_table), &status); if (rc) goto clear_gisa; out: zdev->kzdev->kvm = kvm; spin_lock(&kvm->arch.kzdev_list_lock); list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list); spin_unlock(&kvm->arch.kzdev_list_lock); mutex_unlock(&kvm->lock); mutex_unlock(&zdev->kzdev_lock); return 0; clear_gisa: zdev->gisa = 0; err: if (zdev->kzdev) kvm_s390_pci_dev_release(zdev); mutex_unlock(&kvm->lock); mutex_unlock(&zdev->kzdev_lock); kvm_put_kvm(kvm); return rc; } static void kvm_s390_pci_unregister_kvm(void *opaque) { struct zpci_dev *zdev = opaque; struct kvm *kvm; u8 status; if (!zdev) return; mutex_lock(&zdev->kzdev_lock); if (WARN_ON(!zdev->kzdev)) { mutex_unlock(&zdev->kzdev_lock); return; } kvm = zdev->kzdev->kvm; mutex_lock(&kvm->lock); /* * A 0 gisa means interpretation was never enabled, just remove the * device from the list. */ if (zdev->gisa == 0) goto out; /* Forwarding must be turned off before interpretation */ if (zdev->kzdev->fib.fmt0.aibv != 0) kvm_s390_pci_aif_disable(zdev, true); /* Remove the host CLP guest designation */ zdev->gisa = 0; if (zdev_enabled(zdev)) { if (zpci_disable_device(zdev)) goto out; } if (zpci_enable_device(zdev)) goto out; /* Re-register the IOMMU that was already created */ zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(zdev->dma_table), &status); out: spin_lock(&kvm->arch.kzdev_list_lock); list_del(&zdev->kzdev->entry); spin_unlock(&kvm->arch.kzdev_list_lock); kvm_s390_pci_dev_release(zdev); mutex_unlock(&kvm->lock); mutex_unlock(&zdev->kzdev_lock); kvm_put_kvm(kvm); } void kvm_s390_pci_init_list(struct kvm *kvm) { spin_lock_init(&kvm->arch.kzdev_list_lock); INIT_LIST_HEAD(&kvm->arch.kzdev_list); } void kvm_s390_pci_clear_list(struct kvm *kvm) { /* * This list should already be empty, either via vfio device closures * or kvm fd cleanup. */ spin_lock(&kvm->arch.kzdev_list_lock); WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list)); spin_unlock(&kvm->arch.kzdev_list_lock); } static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh) { struct zpci_dev *zdev = NULL; struct kvm_zdev *kzdev; spin_lock(&kvm->arch.kzdev_list_lock); list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) { if (kzdev->zdev->fh == fh) { zdev = kzdev->zdev; break; } } spin_unlock(&kvm->arch.kzdev_list_lock); return zdev; } static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev, struct kvm_s390_zpci_op *args) { struct zpci_fib fib = {}; bool hostflag; fib.fmt0.aibv = args->u.reg_aen.ibv; fib.fmt0.isc = args->u.reg_aen.isc; fib.fmt0.noi = args->u.reg_aen.noi; if (args->u.reg_aen.sb != 0) { fib.fmt0.aisb = args->u.reg_aen.sb; fib.fmt0.aisbo = args->u.reg_aen.sbo; fib.fmt0.sum = 1; } else { fib.fmt0.aisb = 0; fib.fmt0.aisbo = 0; fib.fmt0.sum = 0; } hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST); return kvm_s390_pci_aif_enable(zdev, &fib, hostflag); } int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args) { struct kvm_zdev *kzdev; struct zpci_dev *zdev; int r; zdev = get_zdev_from_kvm_by_fh(kvm, args->fh); if (!zdev) return -ENODEV; mutex_lock(&zdev->kzdev_lock); mutex_lock(&kvm->lock); kzdev = zdev->kzdev; if (!kzdev) { r = -ENODEV; goto out; } if (kzdev->kvm != kvm) { r = -EPERM; goto out; } switch (args->op) { case KVM_S390_ZPCIOP_REG_AEN: /* Fail on unknown flags */ if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) { r = -EINVAL; break; } r = kvm_s390_pci_zpci_reg_aen(zdev, args); break; case KVM_S390_ZPCIOP_DEREG_AEN: r = kvm_s390_pci_aif_disable(zdev, false); break; default: r = -EINVAL; } out: mutex_unlock(&kvm->lock); mutex_unlock(&zdev->kzdev_lock); return r; } int __init kvm_s390_pci_init(void) { zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; if (!kvm_s390_pci_interp_allowed()) return 0; aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL); if (!aift) return -ENOMEM; spin_lock_init(&aift->gait_lock); mutex_init(&aift->aift_lock); return 0; } void kvm_s390_pci_exit(void) { zpci_kvm_hook.kvm_register = NULL; zpci_kvm_hook.kvm_unregister = NULL; if (!kvm_s390_pci_interp_allowed()) return; mutex_destroy(&aift->aift_lock); kfree(aift); }