1 /* 2 * VFIO PCI interrupt handling 3 * 4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 5 * Author: Alex Williamson <alex.williamson@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio: 12 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 13 * Author: Tom Lyon, pugs@cisco.com 14 */ 15 16 #include <linux/device.h> 17 #include <linux/interrupt.h> 18 #include <linux/eventfd.h> 19 #include <linux/msi.h> 20 #include <linux/pci.h> 21 #include <linux/file.h> 22 #include <linux/poll.h> 23 #include <linux/vfio.h> 24 #include <linux/wait.h> 25 #include <linux/workqueue.h> 26 #include <linux/slab.h> 27 28 #include "vfio_pci_private.h" 29 30 /* 31 * IRQfd - generic 32 */ 33 struct virqfd { 34 struct vfio_pci_device *vdev; 35 struct eventfd_ctx *eventfd; 36 int (*handler)(struct vfio_pci_device *, void *); 37 void (*thread)(struct vfio_pci_device *, void *); 38 void *data; 39 struct work_struct inject; 40 wait_queue_t wait; 41 poll_table pt; 42 struct work_struct shutdown; 43 struct virqfd **pvirqfd; 44 }; 45 46 static struct workqueue_struct *vfio_irqfd_cleanup_wq; 47 48 int __init vfio_pci_virqfd_init(void) 49 { 50 vfio_irqfd_cleanup_wq = 51 create_singlethread_workqueue("vfio-irqfd-cleanup"); 52 if (!vfio_irqfd_cleanup_wq) 53 return -ENOMEM; 54 55 return 0; 56 } 57 58 void vfio_pci_virqfd_exit(void) 59 { 60 destroy_workqueue(vfio_irqfd_cleanup_wq); 61 } 62 63 static void virqfd_deactivate(struct virqfd *virqfd) 64 { 65 queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); 66 } 67 68 static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) 69 { 70 struct virqfd *virqfd = container_of(wait, struct virqfd, wait); 71 unsigned long flags = (unsigned long)key; 72 73 if (flags & POLLIN) { 74 /* An event has been signaled, call function */ 75 if ((!virqfd->handler || 76 virqfd->handler(virqfd->vdev, virqfd->data)) && 77 virqfd->thread) 78 schedule_work(&virqfd->inject); 79 } 80 81 if (flags & POLLHUP) { 82 unsigned long flags; 83 spin_lock_irqsave(&virqfd->vdev->irqlock, flags); 84 85 /* 86 * The eventfd is closing, if the virqfd has not yet been 87 * queued for release, as determined by testing whether the 88 * vdev pointer to it is still valid, queue it now. As 89 * with kvm irqfds, we know we won't race against the virqfd 90 * going away because we hold wqh->lock to get here. 91 */ 92 if (*(virqfd->pvirqfd) == virqfd) { 93 *(virqfd->pvirqfd) = NULL; 94 virqfd_deactivate(virqfd); 95 } 96 97 spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); 98 } 99 100 return 0; 101 } 102 103 static void virqfd_ptable_queue_proc(struct file *file, 104 wait_queue_head_t *wqh, poll_table *pt) 105 { 106 struct virqfd *virqfd = container_of(pt, struct virqfd, pt); 107 add_wait_queue(wqh, &virqfd->wait); 108 } 109 110 static void virqfd_shutdown(struct work_struct *work) 111 { 112 struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); 113 u64 cnt; 114 115 eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); 116 flush_work(&virqfd->inject); 117 eventfd_ctx_put(virqfd->eventfd); 118 119 kfree(virqfd); 120 } 121 122 static void virqfd_inject(struct work_struct *work) 123 { 124 struct virqfd *virqfd = container_of(work, struct virqfd, inject); 125 if (virqfd->thread) 126 virqfd->thread(virqfd->vdev, virqfd->data); 127 } 128 129 static int virqfd_enable(struct vfio_pci_device *vdev, 130 int (*handler)(struct vfio_pci_device *, void *), 131 void (*thread)(struct vfio_pci_device *, void *), 132 void *data, struct virqfd **pvirqfd, int fd) 133 { 134 struct fd irqfd; 135 struct eventfd_ctx *ctx; 136 struct virqfd *virqfd; 137 int ret = 0; 138 unsigned int events; 139 140 virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); 141 if (!virqfd) 142 return -ENOMEM; 143 144 virqfd->pvirqfd = pvirqfd; 145 virqfd->vdev = vdev; 146 virqfd->handler = handler; 147 virqfd->thread = thread; 148 virqfd->data = data; 149 150 INIT_WORK(&virqfd->shutdown, virqfd_shutdown); 151 INIT_WORK(&virqfd->inject, virqfd_inject); 152 153 irqfd = fdget(fd); 154 if (!irqfd.file) { 155 ret = -EBADF; 156 goto err_fd; 157 } 158 159 ctx = eventfd_ctx_fileget(irqfd.file); 160 if (IS_ERR(ctx)) { 161 ret = PTR_ERR(ctx); 162 goto err_ctx; 163 } 164 165 virqfd->eventfd = ctx; 166 167 /* 168 * virqfds can be released by closing the eventfd or directly 169 * through ioctl. These are both done through a workqueue, so 170 * we update the pointer to the virqfd under lock to avoid 171 * pushing multiple jobs to release the same virqfd. 172 */ 173 spin_lock_irq(&vdev->irqlock); 174 175 if (*pvirqfd) { 176 spin_unlock_irq(&vdev->irqlock); 177 ret = -EBUSY; 178 goto err_busy; 179 } 180 *pvirqfd = virqfd; 181 182 spin_unlock_irq(&vdev->irqlock); 183 184 /* 185 * Install our own custom wake-up handling so we are notified via 186 * a callback whenever someone signals the underlying eventfd. 187 */ 188 init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); 189 init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); 190 191 events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); 192 193 /* 194 * Check if there was an event already pending on the eventfd 195 * before we registered and trigger it as if we didn't miss it. 196 */ 197 if (events & POLLIN) { 198 if ((!handler || handler(vdev, data)) && thread) 199 schedule_work(&virqfd->inject); 200 } 201 202 /* 203 * Do not drop the file until the irqfd is fully initialized, 204 * otherwise we might race against the POLLHUP. 205 */ 206 fdput(irqfd); 207 208 return 0; 209 err_busy: 210 eventfd_ctx_put(ctx); 211 err_ctx: 212 fdput(irqfd); 213 err_fd: 214 kfree(virqfd); 215 216 return ret; 217 } 218 219 static void virqfd_disable(struct vfio_pci_device *vdev, 220 struct virqfd **pvirqfd) 221 { 222 unsigned long flags; 223 224 spin_lock_irqsave(&vdev->irqlock, flags); 225 226 if (*pvirqfd) { 227 virqfd_deactivate(*pvirqfd); 228 *pvirqfd = NULL; 229 } 230 231 spin_unlock_irqrestore(&vdev->irqlock, flags); 232 233 /* 234 * Block until we know all outstanding shutdown jobs have completed. 235 * Even if we don't queue the job, flush the wq to be sure it's 236 * been released. 237 */ 238 flush_workqueue(vfio_irqfd_cleanup_wq); 239 } 240 241 /* 242 * INTx 243 */ 244 static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused) 245 { 246 if (likely(is_intx(vdev) && !vdev->virq_disabled)) 247 eventfd_signal(vdev->ctx[0].trigger, 1); 248 } 249 250 void vfio_pci_intx_mask(struct vfio_pci_device *vdev) 251 { 252 struct pci_dev *pdev = vdev->pdev; 253 unsigned long flags; 254 255 spin_lock_irqsave(&vdev->irqlock, flags); 256 257 /* 258 * Masking can come from interrupt, ioctl, or config space 259 * via INTx disable. The latter means this can get called 260 * even when not using intx delivery. In this case, just 261 * try to have the physical bit follow the virtual bit. 262 */ 263 if (unlikely(!is_intx(vdev))) { 264 if (vdev->pci_2_3) 265 pci_intx(pdev, 0); 266 } else if (!vdev->ctx[0].masked) { 267 /* 268 * Can't use check_and_mask here because we always want to 269 * mask, not just when something is pending. 270 */ 271 if (vdev->pci_2_3) 272 pci_intx(pdev, 0); 273 else 274 disable_irq_nosync(pdev->irq); 275 276 vdev->ctx[0].masked = true; 277 } 278 279 spin_unlock_irqrestore(&vdev->irqlock, flags); 280 } 281 282 /* 283 * If this is triggered by an eventfd, we can't call eventfd_signal 284 * or else we'll deadlock on the eventfd wait queue. Return >0 when 285 * a signal is necessary, which can then be handled via a work queue 286 * or directly depending on the caller. 287 */ 288 static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, 289 void *unused) 290 { 291 struct pci_dev *pdev = vdev->pdev; 292 unsigned long flags; 293 int ret = 0; 294 295 spin_lock_irqsave(&vdev->irqlock, flags); 296 297 /* 298 * Unmasking comes from ioctl or config, so again, have the 299 * physical bit follow the virtual even when not using INTx. 300 */ 301 if (unlikely(!is_intx(vdev))) { 302 if (vdev->pci_2_3) 303 pci_intx(pdev, 1); 304 } else if (vdev->ctx[0].masked && !vdev->virq_disabled) { 305 /* 306 * A pending interrupt here would immediately trigger, 307 * but we can avoid that overhead by just re-sending 308 * the interrupt to the user. 309 */ 310 if (vdev->pci_2_3) { 311 if (!pci_check_and_unmask_intx(pdev)) 312 ret = 1; 313 } else 314 enable_irq(pdev->irq); 315 316 vdev->ctx[0].masked = (ret > 0); 317 } 318 319 spin_unlock_irqrestore(&vdev->irqlock, flags); 320 321 return ret; 322 } 323 324 void vfio_pci_intx_unmask(struct vfio_pci_device *vdev) 325 { 326 if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) 327 vfio_send_intx_eventfd(vdev, NULL); 328 } 329 330 static irqreturn_t vfio_intx_handler(int irq, void *dev_id) 331 { 332 struct vfio_pci_device *vdev = dev_id; 333 unsigned long flags; 334 int ret = IRQ_NONE; 335 336 spin_lock_irqsave(&vdev->irqlock, flags); 337 338 if (!vdev->pci_2_3) { 339 disable_irq_nosync(vdev->pdev->irq); 340 vdev->ctx[0].masked = true; 341 ret = IRQ_HANDLED; 342 } else if (!vdev->ctx[0].masked && /* may be shared */ 343 pci_check_and_mask_intx(vdev->pdev)) { 344 vdev->ctx[0].masked = true; 345 ret = IRQ_HANDLED; 346 } 347 348 spin_unlock_irqrestore(&vdev->irqlock, flags); 349 350 if (ret == IRQ_HANDLED) 351 vfio_send_intx_eventfd(vdev, NULL); 352 353 return ret; 354 } 355 356 static int vfio_intx_enable(struct vfio_pci_device *vdev) 357 { 358 if (!is_irq_none(vdev)) 359 return -EINVAL; 360 361 if (!vdev->pdev->irq) 362 return -ENODEV; 363 364 vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); 365 if (!vdev->ctx) 366 return -ENOMEM; 367 368 vdev->num_ctx = 1; 369 370 /* 371 * If the virtual interrupt is masked, restore it. Devices 372 * supporting DisINTx can be masked at the hardware level 373 * here, non-PCI-2.3 devices will have to wait until the 374 * interrupt is enabled. 375 */ 376 vdev->ctx[0].masked = vdev->virq_disabled; 377 if (vdev->pci_2_3) 378 pci_intx(vdev->pdev, !vdev->ctx[0].masked); 379 380 vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; 381 382 return 0; 383 } 384 385 static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) 386 { 387 struct pci_dev *pdev = vdev->pdev; 388 unsigned long irqflags = IRQF_SHARED; 389 struct eventfd_ctx *trigger; 390 unsigned long flags; 391 int ret; 392 393 if (vdev->ctx[0].trigger) { 394 free_irq(pdev->irq, vdev); 395 kfree(vdev->ctx[0].name); 396 eventfd_ctx_put(vdev->ctx[0].trigger); 397 vdev->ctx[0].trigger = NULL; 398 } 399 400 if (fd < 0) /* Disable only */ 401 return 0; 402 403 vdev->ctx[0].name = kasprintf(GFP_KERNEL, "vfio-intx(%s)", 404 pci_name(pdev)); 405 if (!vdev->ctx[0].name) 406 return -ENOMEM; 407 408 trigger = eventfd_ctx_fdget(fd); 409 if (IS_ERR(trigger)) { 410 kfree(vdev->ctx[0].name); 411 return PTR_ERR(trigger); 412 } 413 414 vdev->ctx[0].trigger = trigger; 415 416 if (!vdev->pci_2_3) 417 irqflags = 0; 418 419 ret = request_irq(pdev->irq, vfio_intx_handler, 420 irqflags, vdev->ctx[0].name, vdev); 421 if (ret) { 422 vdev->ctx[0].trigger = NULL; 423 kfree(vdev->ctx[0].name); 424 eventfd_ctx_put(trigger); 425 return ret; 426 } 427 428 /* 429 * INTx disable will stick across the new irq setup, 430 * disable_irq won't. 431 */ 432 spin_lock_irqsave(&vdev->irqlock, flags); 433 if (!vdev->pci_2_3 && vdev->ctx[0].masked) 434 disable_irq_nosync(pdev->irq); 435 spin_unlock_irqrestore(&vdev->irqlock, flags); 436 437 return 0; 438 } 439 440 static void vfio_intx_disable(struct vfio_pci_device *vdev) 441 { 442 vfio_intx_set_signal(vdev, -1); 443 virqfd_disable(vdev, &vdev->ctx[0].unmask); 444 virqfd_disable(vdev, &vdev->ctx[0].mask); 445 vdev->irq_type = VFIO_PCI_NUM_IRQS; 446 vdev->num_ctx = 0; 447 kfree(vdev->ctx); 448 } 449 450 /* 451 * MSI/MSI-X 452 */ 453 static irqreturn_t vfio_msihandler(int irq, void *arg) 454 { 455 struct eventfd_ctx *trigger = arg; 456 457 eventfd_signal(trigger, 1); 458 return IRQ_HANDLED; 459 } 460 461 static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) 462 { 463 struct pci_dev *pdev = vdev->pdev; 464 int ret; 465 466 if (!is_irq_none(vdev)) 467 return -EINVAL; 468 469 vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); 470 if (!vdev->ctx) 471 return -ENOMEM; 472 473 if (msix) { 474 int i; 475 476 vdev->msix = kzalloc(nvec * sizeof(struct msix_entry), 477 GFP_KERNEL); 478 if (!vdev->msix) { 479 kfree(vdev->ctx); 480 return -ENOMEM; 481 } 482 483 for (i = 0; i < nvec; i++) 484 vdev->msix[i].entry = i; 485 486 ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec); 487 if (ret < nvec) { 488 if (ret > 0) 489 pci_disable_msix(pdev); 490 kfree(vdev->msix); 491 kfree(vdev->ctx); 492 return ret; 493 } 494 } else { 495 ret = pci_enable_msi_range(pdev, 1, nvec); 496 if (ret < nvec) { 497 if (ret > 0) 498 pci_disable_msi(pdev); 499 kfree(vdev->ctx); 500 return ret; 501 } 502 } 503 504 vdev->num_ctx = nvec; 505 vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX : 506 VFIO_PCI_MSI_IRQ_INDEX; 507 508 if (!msix) { 509 /* 510 * Compute the virtual hardware field for max msi vectors - 511 * it is the log base 2 of the number of vectors. 512 */ 513 vdev->msi_qmax = fls(nvec * 2 - 1) - 1; 514 } 515 516 return 0; 517 } 518 519 static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, 520 int vector, int fd, bool msix) 521 { 522 struct pci_dev *pdev = vdev->pdev; 523 int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector; 524 char *name = msix ? "vfio-msix" : "vfio-msi"; 525 struct eventfd_ctx *trigger; 526 int ret; 527 528 if (vector >= vdev->num_ctx) 529 return -EINVAL; 530 531 if (vdev->ctx[vector].trigger) { 532 free_irq(irq, vdev->ctx[vector].trigger); 533 kfree(vdev->ctx[vector].name); 534 eventfd_ctx_put(vdev->ctx[vector].trigger); 535 vdev->ctx[vector].trigger = NULL; 536 } 537 538 if (fd < 0) 539 return 0; 540 541 vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)", 542 name, vector, pci_name(pdev)); 543 if (!vdev->ctx[vector].name) 544 return -ENOMEM; 545 546 trigger = eventfd_ctx_fdget(fd); 547 if (IS_ERR(trigger)) { 548 kfree(vdev->ctx[vector].name); 549 return PTR_ERR(trigger); 550 } 551 552 /* 553 * The MSIx vector table resides in device memory which may be cleared 554 * via backdoor resets. We don't allow direct access to the vector 555 * table so even if a userspace driver attempts to save/restore around 556 * such a reset it would be unsuccessful. To avoid this, restore the 557 * cached value of the message prior to enabling. 558 */ 559 if (msix) { 560 struct msi_msg msg; 561 562 get_cached_msi_msg(irq, &msg); 563 pci_write_msi_msg(irq, &msg); 564 } 565 566 ret = request_irq(irq, vfio_msihandler, 0, 567 vdev->ctx[vector].name, trigger); 568 if (ret) { 569 kfree(vdev->ctx[vector].name); 570 eventfd_ctx_put(trigger); 571 return ret; 572 } 573 574 vdev->ctx[vector].trigger = trigger; 575 576 return 0; 577 } 578 579 static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, 580 unsigned count, int32_t *fds, bool msix) 581 { 582 int i, j, ret = 0; 583 584 if (start + count > vdev->num_ctx) 585 return -EINVAL; 586 587 for (i = 0, j = start; i < count && !ret; i++, j++) { 588 int fd = fds ? fds[i] : -1; 589 ret = vfio_msi_set_vector_signal(vdev, j, fd, msix); 590 } 591 592 if (ret) { 593 for (--j; j >= start; j--) 594 vfio_msi_set_vector_signal(vdev, j, -1, msix); 595 } 596 597 return ret; 598 } 599 600 static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) 601 { 602 struct pci_dev *pdev = vdev->pdev; 603 int i; 604 605 vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); 606 607 for (i = 0; i < vdev->num_ctx; i++) { 608 virqfd_disable(vdev, &vdev->ctx[i].unmask); 609 virqfd_disable(vdev, &vdev->ctx[i].mask); 610 } 611 612 if (msix) { 613 pci_disable_msix(vdev->pdev); 614 kfree(vdev->msix); 615 } else 616 pci_disable_msi(pdev); 617 618 vdev->irq_type = VFIO_PCI_NUM_IRQS; 619 vdev->num_ctx = 0; 620 kfree(vdev->ctx); 621 } 622 623 /* 624 * IOCTL support 625 */ 626 static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, 627 unsigned index, unsigned start, 628 unsigned count, uint32_t flags, void *data) 629 { 630 if (!is_intx(vdev) || start != 0 || count != 1) 631 return -EINVAL; 632 633 if (flags & VFIO_IRQ_SET_DATA_NONE) { 634 vfio_pci_intx_unmask(vdev); 635 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { 636 uint8_t unmask = *(uint8_t *)data; 637 if (unmask) 638 vfio_pci_intx_unmask(vdev); 639 } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 640 int32_t fd = *(int32_t *)data; 641 if (fd >= 0) 642 return virqfd_enable(vdev, vfio_pci_intx_unmask_handler, 643 vfio_send_intx_eventfd, NULL, 644 &vdev->ctx[0].unmask, fd); 645 646 virqfd_disable(vdev, &vdev->ctx[0].unmask); 647 } 648 649 return 0; 650 } 651 652 static int vfio_pci_set_intx_mask(struct vfio_pci_device *vdev, 653 unsigned index, unsigned start, 654 unsigned count, uint32_t flags, void *data) 655 { 656 if (!is_intx(vdev) || start != 0 || count != 1) 657 return -EINVAL; 658 659 if (flags & VFIO_IRQ_SET_DATA_NONE) { 660 vfio_pci_intx_mask(vdev); 661 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { 662 uint8_t mask = *(uint8_t *)data; 663 if (mask) 664 vfio_pci_intx_mask(vdev); 665 } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 666 return -ENOTTY; /* XXX implement me */ 667 } 668 669 return 0; 670 } 671 672 static int vfio_pci_set_intx_trigger(struct vfio_pci_device *vdev, 673 unsigned index, unsigned start, 674 unsigned count, uint32_t flags, void *data) 675 { 676 if (is_intx(vdev) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { 677 vfio_intx_disable(vdev); 678 return 0; 679 } 680 681 if (!(is_intx(vdev) || is_irq_none(vdev)) || start != 0 || count != 1) 682 return -EINVAL; 683 684 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 685 int32_t fd = *(int32_t *)data; 686 int ret; 687 688 if (is_intx(vdev)) 689 return vfio_intx_set_signal(vdev, fd); 690 691 ret = vfio_intx_enable(vdev); 692 if (ret) 693 return ret; 694 695 ret = vfio_intx_set_signal(vdev, fd); 696 if (ret) 697 vfio_intx_disable(vdev); 698 699 return ret; 700 } 701 702 if (!is_intx(vdev)) 703 return -EINVAL; 704 705 if (flags & VFIO_IRQ_SET_DATA_NONE) { 706 vfio_send_intx_eventfd(vdev, NULL); 707 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { 708 uint8_t trigger = *(uint8_t *)data; 709 if (trigger) 710 vfio_send_intx_eventfd(vdev, NULL); 711 } 712 return 0; 713 } 714 715 static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev, 716 unsigned index, unsigned start, 717 unsigned count, uint32_t flags, void *data) 718 { 719 int i; 720 bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false; 721 722 if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) { 723 vfio_msi_disable(vdev, msix); 724 return 0; 725 } 726 727 if (!(irq_is(vdev, index) || is_irq_none(vdev))) 728 return -EINVAL; 729 730 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { 731 int32_t *fds = data; 732 int ret; 733 734 if (vdev->irq_type == index) 735 return vfio_msi_set_block(vdev, start, count, 736 fds, msix); 737 738 ret = vfio_msi_enable(vdev, start + count, msix); 739 if (ret) 740 return ret; 741 742 ret = vfio_msi_set_block(vdev, start, count, fds, msix); 743 if (ret) 744 vfio_msi_disable(vdev, msix); 745 746 return ret; 747 } 748 749 if (!irq_is(vdev, index) || start + count > vdev->num_ctx) 750 return -EINVAL; 751 752 for (i = start; i < start + count; i++) { 753 if (!vdev->ctx[i].trigger) 754 continue; 755 if (flags & VFIO_IRQ_SET_DATA_NONE) { 756 eventfd_signal(vdev->ctx[i].trigger, 1); 757 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { 758 uint8_t *bools = data; 759 if (bools[i - start]) 760 eventfd_signal(vdev->ctx[i].trigger, 1); 761 } 762 } 763 return 0; 764 } 765 766 static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx, 767 uint32_t flags, void *data) 768 { 769 int32_t fd = *(int32_t *)data; 770 771 if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK)) 772 return -EINVAL; 773 774 /* DATA_NONE/DATA_BOOL enables loopback testing */ 775 if (flags & VFIO_IRQ_SET_DATA_NONE) { 776 if (*ctx) 777 eventfd_signal(*ctx, 1); 778 return 0; 779 } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { 780 uint8_t trigger = *(uint8_t *)data; 781 if (trigger && *ctx) 782 eventfd_signal(*ctx, 1); 783 return 0; 784 } 785 786 /* Handle SET_DATA_EVENTFD */ 787 if (fd == -1) { 788 if (*ctx) 789 eventfd_ctx_put(*ctx); 790 *ctx = NULL; 791 return 0; 792 } else if (fd >= 0) { 793 struct eventfd_ctx *efdctx; 794 efdctx = eventfd_ctx_fdget(fd); 795 if (IS_ERR(efdctx)) 796 return PTR_ERR(efdctx); 797 if (*ctx) 798 eventfd_ctx_put(*ctx); 799 *ctx = efdctx; 800 return 0; 801 } else 802 return -EINVAL; 803 } 804 805 static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev, 806 unsigned index, unsigned start, 807 unsigned count, uint32_t flags, void *data) 808 { 809 if (index != VFIO_PCI_ERR_IRQ_INDEX) 810 return -EINVAL; 811 812 /* 813 * We should sanitize start & count, but that wasn't caught 814 * originally, so this IRQ index must forever ignore them :-( 815 */ 816 817 return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data); 818 } 819 820 static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev, 821 unsigned index, unsigned start, 822 unsigned count, uint32_t flags, void *data) 823 { 824 if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1) 825 return -EINVAL; 826 827 return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data); 828 } 829 830 int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, 831 unsigned index, unsigned start, unsigned count, 832 void *data) 833 { 834 int (*func)(struct vfio_pci_device *vdev, unsigned index, 835 unsigned start, unsigned count, uint32_t flags, 836 void *data) = NULL; 837 838 switch (index) { 839 case VFIO_PCI_INTX_IRQ_INDEX: 840 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 841 case VFIO_IRQ_SET_ACTION_MASK: 842 func = vfio_pci_set_intx_mask; 843 break; 844 case VFIO_IRQ_SET_ACTION_UNMASK: 845 func = vfio_pci_set_intx_unmask; 846 break; 847 case VFIO_IRQ_SET_ACTION_TRIGGER: 848 func = vfio_pci_set_intx_trigger; 849 break; 850 } 851 break; 852 case VFIO_PCI_MSI_IRQ_INDEX: 853 case VFIO_PCI_MSIX_IRQ_INDEX: 854 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 855 case VFIO_IRQ_SET_ACTION_MASK: 856 case VFIO_IRQ_SET_ACTION_UNMASK: 857 /* XXX Need masking support exported */ 858 break; 859 case VFIO_IRQ_SET_ACTION_TRIGGER: 860 func = vfio_pci_set_msi_trigger; 861 break; 862 } 863 break; 864 case VFIO_PCI_ERR_IRQ_INDEX: 865 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 866 case VFIO_IRQ_SET_ACTION_TRIGGER: 867 if (pci_is_pcie(vdev->pdev)) 868 func = vfio_pci_set_err_trigger; 869 break; 870 } 871 break; 872 case VFIO_PCI_REQ_IRQ_INDEX: 873 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { 874 case VFIO_IRQ_SET_ACTION_TRIGGER: 875 func = vfio_pci_set_req_trigger; 876 break; 877 } 878 break; 879 } 880 881 if (!func) 882 return -ENOTTY; 883 884 return func(vdev, index, start, count, flags, data); 885 } 886