1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * eventfd support for mshv 4 * 5 * Heavily inspired from KVM implementation of irqfd/ioeventfd. The basic 6 * framework code is taken from the kvm implementation. 7 * 8 * All credits to kvm developers. 9 */ 10 11 #include <linux/syscalls.h> 12 #include <linux/wait.h> 13 #include <linux/poll.h> 14 #include <linux/file.h> 15 #include <linux/list.h> 16 #include <linux/workqueue.h> 17 #include <linux/eventfd.h> 18 19 #if IS_ENABLED(CONFIG_X86_64) 20 #include <asm/apic.h> 21 #endif 22 #include <asm/mshyperv.h> 23 24 #include "mshv_eventfd.h" 25 #include "mshv.h" 26 #include "mshv_root.h" 27 28 static struct workqueue_struct *irqfd_cleanup_wq; 29 30 void mshv_register_irq_ack_notifier(struct mshv_partition *partition, 31 struct mshv_irq_ack_notifier *mian) 32 { 33 mutex_lock(&partition->pt_irq_lock); 34 hlist_add_head_rcu(&mian->link, &partition->irq_ack_notifier_list); 35 mutex_unlock(&partition->pt_irq_lock); 36 } 37 38 void mshv_unregister_irq_ack_notifier(struct mshv_partition *partition, 39 struct mshv_irq_ack_notifier *mian) 40 { 41 mutex_lock(&partition->pt_irq_lock); 42 hlist_del_init_rcu(&mian->link); 43 mutex_unlock(&partition->pt_irq_lock); 44 synchronize_rcu(); 45 } 46 47 bool mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi) 48 { 49 struct mshv_irq_ack_notifier *mian; 50 bool acked = false; 51 52 rcu_read_lock(); 53 hlist_for_each_entry_rcu(mian, &partition->irq_ack_notifier_list, 54 link) { 55 if (mian->irq_ack_gsi == gsi) { 56 mian->irq_acked(mian); 57 acked = true; 58 } 59 } 60 rcu_read_unlock(); 61 62 return acked; 63 } 64 65 #if IS_ENABLED(CONFIG_ARM64) 66 static inline bool hv_should_clear_interrupt(enum hv_interrupt_type type) 67 { 68 return false; 69 } 70 #elif IS_ENABLED(CONFIG_X86_64) 71 static inline bool hv_should_clear_interrupt(enum hv_interrupt_type type) 72 { 73 return type == HV_X64_INTERRUPT_TYPE_EXTINT; 74 } 75 #endif 76 77 static void mshv_irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian) 78 { 79 struct mshv_irqfd_resampler *resampler; 80 struct mshv_partition *partition; 81 struct mshv_irqfd *irqfd; 82 int idx; 83 84 resampler = container_of(mian, struct mshv_irqfd_resampler, 85 rsmplr_notifier); 86 partition = resampler->rsmplr_partn; 87 88 idx = srcu_read_lock(&partition->pt_irq_srcu); 89 90 hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list, 91 irqfd_resampler_hnode) { 92 if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type)) 93 hv_call_clear_virtual_interrupt(partition->pt_id); 94 95 eventfd_signal(irqfd->irqfd_resamplefd); 96 } 97 98 srcu_read_unlock(&partition->pt_irq_srcu, idx); 99 } 100 101 #if IS_ENABLED(CONFIG_X86_64) 102 static bool 103 mshv_vp_irq_vector_injected(union hv_vp_register_page_interrupt_vectors iv, 104 u32 vector) 105 { 106 int i; 107 108 for (i = 0; i < iv.vector_count; i++) { 109 if (iv.vector[i] == vector) 110 return true; 111 } 112 113 return false; 114 } 115 116 static int mshv_vp_irq_try_set_vector(struct mshv_vp *vp, u32 vector) 117 { 118 union hv_vp_register_page_interrupt_vectors iv, new_iv; 119 120 iv = vp->vp_register_page->interrupt_vectors; 121 new_iv = iv; 122 123 if (mshv_vp_irq_vector_injected(iv, vector)) 124 return 0; 125 126 if (iv.vector_count >= HV_VP_REGISTER_PAGE_MAX_VECTOR_COUNT) 127 return -ENOSPC; 128 129 new_iv.vector[new_iv.vector_count++] = vector; 130 131 if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64, 132 iv.as_uint64, new_iv.as_uint64) != iv.as_uint64) 133 return -EAGAIN; 134 135 return 0; 136 } 137 138 static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32 vector) 139 { 140 int ret; 141 142 do { 143 ret = mshv_vp_irq_try_set_vector(vp, vector); 144 } while (ret == -EAGAIN && !need_resched()); 145 146 return ret; 147 } 148 149 /* 150 * Try to raise irq for guest via shared vector array. hyp does the actual 151 * inject of the interrupt. 152 */ 153 static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) 154 { 155 struct mshv_partition *partition = irqfd->irqfd_partn; 156 struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; 157 struct mshv_vp *vp; 158 159 if (!(ms_hyperv.ext_features & 160 HV_VP_DISPATCH_INTERRUPT_INJECTION_AVAILABLE)) 161 return -EOPNOTSUPP; 162 163 if (hv_scheduler_type != HV_SCHEDULER_TYPE_ROOT) 164 return -EOPNOTSUPP; 165 166 #if IS_ENABLED(CONFIG_X86) 167 if (irq->lapic_control.logical_dest_mode) 168 return -EOPNOTSUPP; 169 #endif 170 171 vp = partition->pt_vp_array[irq->lapic_apic_id]; 172 173 if (!vp->vp_register_page) 174 return -EOPNOTSUPP; 175 176 if (mshv_vp_irq_set_vector(vp, irq->lapic_vector)) 177 return -EINVAL; 178 179 if (vp->run.flags.root_sched_dispatched && 180 vp->vp_register_page->interrupt_vectors.as_uint64) 181 return -EBUSY; 182 183 wake_up(&vp->run.vp_suspend_queue); 184 185 return 0; 186 } 187 #else /* CONFIG_X86_64 */ 188 static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) 189 { 190 return -EOPNOTSUPP; 191 } 192 #endif 193 194 static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd) 195 { 196 struct mshv_partition *partition = irqfd->irqfd_partn; 197 struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; 198 unsigned int seq; 199 int idx; 200 201 #if IS_ENABLED(CONFIG_X86) 202 WARN_ON(irqfd->irqfd_resampler && 203 !irq->lapic_control.level_triggered); 204 #endif 205 206 idx = srcu_read_lock(&partition->pt_irq_srcu); 207 if (irqfd->irqfd_girq_ent.guest_irq_num) { 208 if (!irqfd->irqfd_girq_ent.girq_entry_valid) { 209 srcu_read_unlock(&partition->pt_irq_srcu, idx); 210 return; 211 } 212 213 do { 214 seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); 215 } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); 216 } 217 218 hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id, 219 irq->lapic_vector, irq->lapic_apic_id, 220 irq->lapic_control); 221 srcu_read_unlock(&partition->pt_irq_srcu, idx); 222 } 223 224 static void mshv_irqfd_resampler_shutdown(struct mshv_irqfd *irqfd) 225 { 226 struct mshv_irqfd_resampler *rp = irqfd->irqfd_resampler; 227 struct mshv_partition *pt = rp->rsmplr_partn; 228 229 mutex_lock(&pt->irqfds_resampler_lock); 230 231 hlist_del_rcu(&irqfd->irqfd_resampler_hnode); 232 synchronize_srcu(&pt->pt_irq_srcu); 233 234 if (hlist_empty(&rp->rsmplr_irqfd_list)) { 235 hlist_del(&rp->rsmplr_hnode); 236 mshv_unregister_irq_ack_notifier(pt, &rp->rsmplr_notifier); 237 kfree(rp); 238 } 239 240 mutex_unlock(&pt->irqfds_resampler_lock); 241 } 242 243 /* 244 * Race-free decouple logic (ordering is critical) 245 */ 246 static void mshv_irqfd_shutdown(struct work_struct *work) 247 { 248 struct mshv_irqfd *irqfd = 249 container_of(work, struct mshv_irqfd, irqfd_shutdown); 250 251 /* 252 * Synchronize with the wait-queue and unhook ourselves to prevent 253 * further events. 254 */ 255 remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait); 256 257 if (irqfd->irqfd_resampler) { 258 mshv_irqfd_resampler_shutdown(irqfd); 259 eventfd_ctx_put(irqfd->irqfd_resamplefd); 260 } 261 262 /* 263 * It is now safe to release the object's resources 264 */ 265 eventfd_ctx_put(irqfd->irqfd_eventfd_ctx); 266 kfree(irqfd); 267 } 268 269 /* assumes partition->pt_irqfds_lock is held */ 270 static bool mshv_irqfd_is_active(struct mshv_irqfd *irqfd) 271 { 272 return !hlist_unhashed(&irqfd->irqfd_hnode); 273 } 274 275 /* 276 * Mark the irqfd as inactive and schedule it for removal 277 * 278 * assumes partition->pt_irqfds_lock is held 279 */ 280 static void mshv_irqfd_deactivate(struct mshv_irqfd *irqfd) 281 { 282 if (!mshv_irqfd_is_active(irqfd)) 283 return; 284 285 hlist_del(&irqfd->irqfd_hnode); 286 287 queue_work(irqfd_cleanup_wq, &irqfd->irqfd_shutdown); 288 } 289 290 /* 291 * Called with wqh->lock held and interrupts disabled 292 */ 293 static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, 294 int sync, void *key) 295 { 296 struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd, 297 irqfd_wait); 298 unsigned long flags = (unsigned long)key; 299 int idx; 300 unsigned int seq; 301 struct mshv_partition *pt = irqfd->irqfd_partn; 302 int ret = 0; 303 304 if (flags & POLLIN) { 305 u64 cnt; 306 307 eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt); 308 idx = srcu_read_lock(&pt->pt_irq_srcu); 309 do { 310 seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); 311 } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); 312 313 /* An event has been signaled, raise an interrupt */ 314 ret = mshv_try_assert_irq_fast(irqfd); 315 if (ret) 316 mshv_assert_irq_slow(irqfd); 317 318 srcu_read_unlock(&pt->pt_irq_srcu, idx); 319 320 ret = 1; 321 } 322 323 if (flags & POLLHUP) { 324 /* The eventfd is closing, detach from the partition */ 325 unsigned long flags; 326 327 spin_lock_irqsave(&pt->pt_irqfds_lock, flags); 328 329 /* 330 * We must check if someone deactivated the irqfd before 331 * we could acquire the pt_irqfds_lock since the item is 332 * deactivated from the mshv side before it is unhooked from 333 * the wait-queue. If it is already deactivated, we can 334 * simply return knowing the other side will cleanup for us. 335 * We cannot race against the irqfd going away since the 336 * other side is required to acquire wqh->lock, which we hold 337 */ 338 if (mshv_irqfd_is_active(irqfd)) 339 mshv_irqfd_deactivate(irqfd); 340 341 spin_unlock_irqrestore(&pt->pt_irqfds_lock, flags); 342 } 343 344 return ret; 345 } 346 347 /* Must be called under pt_irqfds_lock */ 348 static void mshv_irqfd_update(struct mshv_partition *pt, 349 struct mshv_irqfd *irqfd) 350 { 351 write_seqcount_begin(&irqfd->irqfd_irqe_sc); 352 irqfd->irqfd_girq_ent = mshv_ret_girq_entry(pt, 353 irqfd->irqfd_irqnum); 354 mshv_copy_girq_info(&irqfd->irqfd_girq_ent, &irqfd->irqfd_lapic_irq); 355 write_seqcount_end(&irqfd->irqfd_irqe_sc); 356 } 357 358 void mshv_irqfd_routing_update(struct mshv_partition *pt) 359 { 360 struct mshv_irqfd *irqfd; 361 362 spin_lock_irq(&pt->pt_irqfds_lock); 363 hlist_for_each_entry(irqfd, &pt->pt_irqfds_list, irqfd_hnode) 364 mshv_irqfd_update(pt, irqfd); 365 spin_unlock_irq(&pt->pt_irqfds_lock); 366 } 367 368 static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh, 369 poll_table *polltbl) 370 { 371 struct mshv_irqfd *irqfd = 372 container_of(polltbl, struct mshv_irqfd, irqfd_polltbl); 373 374 irqfd->irqfd_wqh = wqh; 375 376 /* 377 * TODO: Ensure there isn't already an exclusive, priority waiter, e.g. 378 * that the irqfd isn't already bound to another partition. Only the 379 * first exclusive waiter encountered will be notified, and 380 * add_wait_queue_priority() doesn't enforce exclusivity. 381 */ 382 irqfd->irqfd_wait.flags |= WQ_FLAG_EXCLUSIVE; 383 add_wait_queue_priority(wqh, &irqfd->irqfd_wait); 384 } 385 386 static int mshv_irqfd_assign(struct mshv_partition *pt, 387 struct mshv_user_irqfd *args) 388 { 389 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 390 struct mshv_irqfd *irqfd, *tmp; 391 unsigned int events; 392 int ret; 393 int idx; 394 395 CLASS(fd, f)(args->fd); 396 397 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 398 if (!irqfd) 399 return -ENOMEM; 400 401 irqfd->irqfd_partn = pt; 402 irqfd->irqfd_irqnum = args->gsi; 403 INIT_WORK(&irqfd->irqfd_shutdown, mshv_irqfd_shutdown); 404 seqcount_spinlock_init(&irqfd->irqfd_irqe_sc, &pt->pt_irqfds_lock); 405 406 if (fd_empty(f)) { 407 ret = -EBADF; 408 goto out; 409 } 410 411 eventfd = eventfd_ctx_fileget(fd_file(f)); 412 if (IS_ERR(eventfd)) { 413 ret = PTR_ERR(eventfd); 414 goto fail; 415 } 416 417 irqfd->irqfd_eventfd_ctx = eventfd; 418 419 if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE)) { 420 struct mshv_irqfd_resampler *rp; 421 422 resamplefd = eventfd_ctx_fdget(args->resamplefd); 423 if (IS_ERR(resamplefd)) { 424 ret = PTR_ERR(resamplefd); 425 goto fail; 426 } 427 428 irqfd->irqfd_resamplefd = resamplefd; 429 430 mutex_lock(&pt->irqfds_resampler_lock); 431 432 hlist_for_each_entry(rp, &pt->irqfds_resampler_list, 433 rsmplr_hnode) { 434 if (rp->rsmplr_notifier.irq_ack_gsi == 435 irqfd->irqfd_irqnum) { 436 irqfd->irqfd_resampler = rp; 437 break; 438 } 439 } 440 441 if (!irqfd->irqfd_resampler) { 442 rp = kzalloc(sizeof(*rp), GFP_KERNEL_ACCOUNT); 443 if (!rp) { 444 ret = -ENOMEM; 445 mutex_unlock(&pt->irqfds_resampler_lock); 446 goto fail; 447 } 448 449 rp->rsmplr_partn = pt; 450 INIT_HLIST_HEAD(&rp->rsmplr_irqfd_list); 451 rp->rsmplr_notifier.irq_ack_gsi = irqfd->irqfd_irqnum; 452 rp->rsmplr_notifier.irq_acked = 453 mshv_irqfd_resampler_ack; 454 455 hlist_add_head(&rp->rsmplr_hnode, 456 &pt->irqfds_resampler_list); 457 mshv_register_irq_ack_notifier(pt, 458 &rp->rsmplr_notifier); 459 irqfd->irqfd_resampler = rp; 460 } 461 462 hlist_add_head_rcu(&irqfd->irqfd_resampler_hnode, 463 &irqfd->irqfd_resampler->rsmplr_irqfd_list); 464 465 mutex_unlock(&pt->irqfds_resampler_lock); 466 } 467 468 /* 469 * Install our own custom wake-up handling so we are notified via 470 * a callback whenever someone signals the underlying eventfd 471 */ 472 init_waitqueue_func_entry(&irqfd->irqfd_wait, mshv_irqfd_wakeup); 473 init_poll_funcptr(&irqfd->irqfd_polltbl, mshv_irqfd_queue_proc); 474 475 spin_lock_irq(&pt->pt_irqfds_lock); 476 #if IS_ENABLED(CONFIG_X86) 477 if (args->flags & BIT(MSHV_IRQFD_BIT_RESAMPLE) && 478 !irqfd->irqfd_lapic_irq.lapic_control.level_triggered) { 479 /* 480 * Resample Fd must be for level triggered interrupt 481 * Otherwise return with failure 482 */ 483 spin_unlock_irq(&pt->pt_irqfds_lock); 484 ret = -EINVAL; 485 goto fail; 486 } 487 #endif 488 ret = 0; 489 hlist_for_each_entry(tmp, &pt->pt_irqfds_list, irqfd_hnode) { 490 if (irqfd->irqfd_eventfd_ctx != tmp->irqfd_eventfd_ctx) 491 continue; 492 /* This fd is used for another irq already. */ 493 ret = -EBUSY; 494 spin_unlock_irq(&pt->pt_irqfds_lock); 495 goto fail; 496 } 497 498 idx = srcu_read_lock(&pt->pt_irq_srcu); 499 mshv_irqfd_update(pt, irqfd); 500 hlist_add_head(&irqfd->irqfd_hnode, &pt->pt_irqfds_list); 501 spin_unlock_irq(&pt->pt_irqfds_lock); 502 503 /* 504 * Check if there was an event already pending on the eventfd 505 * before we registered, and trigger it as if we didn't miss it. 506 */ 507 events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl); 508 509 if (events & POLLIN) 510 mshv_assert_irq_slow(irqfd); 511 512 srcu_read_unlock(&pt->pt_irq_srcu, idx); 513 return 0; 514 515 fail: 516 if (irqfd->irqfd_resampler) 517 mshv_irqfd_resampler_shutdown(irqfd); 518 519 if (resamplefd && !IS_ERR(resamplefd)) 520 eventfd_ctx_put(resamplefd); 521 522 if (eventfd && !IS_ERR(eventfd)) 523 eventfd_ctx_put(eventfd); 524 525 out: 526 kfree(irqfd); 527 return ret; 528 } 529 530 /* 531 * shutdown any irqfd's that match fd+gsi 532 */ 533 static int mshv_irqfd_deassign(struct mshv_partition *pt, 534 struct mshv_user_irqfd *args) 535 { 536 struct mshv_irqfd *irqfd; 537 struct hlist_node *n; 538 struct eventfd_ctx *eventfd; 539 540 eventfd = eventfd_ctx_fdget(args->fd); 541 if (IS_ERR(eventfd)) 542 return PTR_ERR(eventfd); 543 544 hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list, 545 irqfd_hnode) { 546 if (irqfd->irqfd_eventfd_ctx == eventfd && 547 irqfd->irqfd_irqnum == args->gsi) 548 549 mshv_irqfd_deactivate(irqfd); 550 } 551 552 eventfd_ctx_put(eventfd); 553 554 /* 555 * Block until we know all outstanding shutdown jobs have completed 556 * so that we guarantee there will not be any more interrupts on this 557 * gsi once this deassign function returns. 558 */ 559 flush_workqueue(irqfd_cleanup_wq); 560 561 return 0; 562 } 563 564 int mshv_set_unset_irqfd(struct mshv_partition *pt, 565 struct mshv_user_irqfd *args) 566 { 567 if (args->flags & ~MSHV_IRQFD_FLAGS_MASK) 568 return -EINVAL; 569 570 if (args->flags & BIT(MSHV_IRQFD_BIT_DEASSIGN)) 571 return mshv_irqfd_deassign(pt, args); 572 573 return mshv_irqfd_assign(pt, args); 574 } 575 576 /* 577 * This function is called as the mshv VM fd is being released. 578 * Shutdown all irqfds that still remain open 579 */ 580 static void mshv_irqfd_release(struct mshv_partition *pt) 581 { 582 struct mshv_irqfd *irqfd; 583 struct hlist_node *n; 584 585 spin_lock_irq(&pt->pt_irqfds_lock); 586 587 hlist_for_each_entry_safe(irqfd, n, &pt->pt_irqfds_list, irqfd_hnode) 588 mshv_irqfd_deactivate(irqfd); 589 590 spin_unlock_irq(&pt->pt_irqfds_lock); 591 592 /* 593 * Block until we know all outstanding shutdown jobs have completed 594 * since we do not take a mshv_partition* reference. 595 */ 596 flush_workqueue(irqfd_cleanup_wq); 597 } 598 599 int mshv_irqfd_wq_init(void) 600 { 601 irqfd_cleanup_wq = alloc_workqueue("mshv-irqfd-cleanup", WQ_PERCPU, 0); 602 if (!irqfd_cleanup_wq) 603 return -ENOMEM; 604 605 return 0; 606 } 607 608 void mshv_irqfd_wq_cleanup(void) 609 { 610 destroy_workqueue(irqfd_cleanup_wq); 611 } 612 613 /* 614 * -------------------------------------------------------------------- 615 * ioeventfd: translate a MMIO memory write to an eventfd signal. 616 * 617 * userspace can register a MMIO address with an eventfd for receiving 618 * notification when the memory has been touched. 619 * -------------------------------------------------------------------- 620 */ 621 622 static void ioeventfd_release(struct mshv_ioeventfd *p, u64 partition_id) 623 { 624 if (p->iovntfd_doorbell_id > 0) 625 mshv_unregister_doorbell(partition_id, p->iovntfd_doorbell_id); 626 eventfd_ctx_put(p->iovntfd_eventfd); 627 kfree(p); 628 } 629 630 /* MMIO writes trigger an event if the addr/val match */ 631 static void ioeventfd_mmio_write(int doorbell_id, void *data) 632 { 633 struct mshv_partition *partition = (struct mshv_partition *)data; 634 struct mshv_ioeventfd *p; 635 636 rcu_read_lock(); 637 hlist_for_each_entry_rcu(p, &partition->ioeventfds_list, iovntfd_hnode) 638 if (p->iovntfd_doorbell_id == doorbell_id) { 639 eventfd_signal(p->iovntfd_eventfd); 640 break; 641 } 642 643 rcu_read_unlock(); 644 } 645 646 static bool ioeventfd_check_collision(struct mshv_partition *pt, 647 struct mshv_ioeventfd *p) 648 __must_hold(&pt->mutex) 649 { 650 struct mshv_ioeventfd *_p; 651 652 hlist_for_each_entry(_p, &pt->ioeventfds_list, iovntfd_hnode) 653 if (_p->iovntfd_addr == p->iovntfd_addr && 654 _p->iovntfd_length == p->iovntfd_length && 655 (_p->iovntfd_wildcard || p->iovntfd_wildcard || 656 _p->iovntfd_datamatch == p->iovntfd_datamatch)) 657 return true; 658 659 return false; 660 } 661 662 static int mshv_assign_ioeventfd(struct mshv_partition *pt, 663 struct mshv_user_ioeventfd *args) 664 __must_hold(&pt->mutex) 665 { 666 struct mshv_ioeventfd *p; 667 struct eventfd_ctx *eventfd; 668 u64 doorbell_flags = 0; 669 int ret; 670 671 /* This mutex is currently protecting ioeventfd.items list */ 672 WARN_ON_ONCE(!mutex_is_locked(&pt->pt_mutex)); 673 674 if (args->flags & BIT(MSHV_IOEVENTFD_BIT_PIO)) 675 return -EOPNOTSUPP; 676 677 /* must be natural-word sized */ 678 switch (args->len) { 679 case 0: 680 doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY; 681 break; 682 case 1: 683 doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE; 684 break; 685 case 2: 686 doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD; 687 break; 688 case 4: 689 doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD; 690 break; 691 case 8: 692 doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD; 693 break; 694 default: 695 return -EINVAL; 696 } 697 698 /* check for range overflow */ 699 if (args->addr + args->len < args->addr) 700 return -EINVAL; 701 702 /* check for extra flags that we don't understand */ 703 if (args->flags & ~MSHV_IOEVENTFD_FLAGS_MASK) 704 return -EINVAL; 705 706 eventfd = eventfd_ctx_fdget(args->fd); 707 if (IS_ERR(eventfd)) 708 return PTR_ERR(eventfd); 709 710 p = kzalloc(sizeof(*p), GFP_KERNEL); 711 if (!p) { 712 ret = -ENOMEM; 713 goto fail; 714 } 715 716 p->iovntfd_addr = args->addr; 717 p->iovntfd_length = args->len; 718 p->iovntfd_eventfd = eventfd; 719 720 /* The datamatch feature is optional, otherwise this is a wildcard */ 721 if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH)) { 722 p->iovntfd_datamatch = args->datamatch; 723 } else { 724 p->iovntfd_wildcard = true; 725 doorbell_flags |= HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE; 726 } 727 728 if (ioeventfd_check_collision(pt, p)) { 729 ret = -EEXIST; 730 goto unlock_fail; 731 } 732 733 ret = mshv_register_doorbell(pt->pt_id, ioeventfd_mmio_write, 734 (void *)pt, p->iovntfd_addr, 735 p->iovntfd_datamatch, doorbell_flags); 736 if (ret < 0) 737 goto unlock_fail; 738 739 p->iovntfd_doorbell_id = ret; 740 741 hlist_add_head_rcu(&p->iovntfd_hnode, &pt->ioeventfds_list); 742 743 return 0; 744 745 unlock_fail: 746 kfree(p); 747 748 fail: 749 eventfd_ctx_put(eventfd); 750 751 return ret; 752 } 753 754 static int mshv_deassign_ioeventfd(struct mshv_partition *pt, 755 struct mshv_user_ioeventfd *args) 756 __must_hold(&pt->mutex) 757 { 758 struct mshv_ioeventfd *p; 759 struct eventfd_ctx *eventfd; 760 struct hlist_node *n; 761 int ret = -ENOENT; 762 763 /* This mutex is currently protecting ioeventfd.items list */ 764 WARN_ON_ONCE(!mutex_is_locked(&pt->pt_mutex)); 765 766 eventfd = eventfd_ctx_fdget(args->fd); 767 if (IS_ERR(eventfd)) 768 return PTR_ERR(eventfd); 769 770 hlist_for_each_entry_safe(p, n, &pt->ioeventfds_list, iovntfd_hnode) { 771 bool wildcard = !(args->flags & BIT(MSHV_IOEVENTFD_BIT_DATAMATCH)); 772 773 if (p->iovntfd_eventfd != eventfd || 774 p->iovntfd_addr != args->addr || 775 p->iovntfd_length != args->len || 776 p->iovntfd_wildcard != wildcard) 777 continue; 778 779 if (!p->iovntfd_wildcard && 780 p->iovntfd_datamatch != args->datamatch) 781 continue; 782 783 hlist_del_rcu(&p->iovntfd_hnode); 784 synchronize_rcu(); 785 ioeventfd_release(p, pt->pt_id); 786 ret = 0; 787 break; 788 } 789 790 eventfd_ctx_put(eventfd); 791 792 return ret; 793 } 794 795 int mshv_set_unset_ioeventfd(struct mshv_partition *pt, 796 struct mshv_user_ioeventfd *args) 797 __must_hold(&pt->mutex) 798 { 799 if ((args->flags & ~MSHV_IOEVENTFD_FLAGS_MASK) || 800 mshv_field_nonzero(*args, rsvd)) 801 return -EINVAL; 802 803 /* PIO not yet implemented */ 804 if (args->flags & BIT(MSHV_IOEVENTFD_BIT_PIO)) 805 return -EOPNOTSUPP; 806 807 if (args->flags & BIT(MSHV_IOEVENTFD_BIT_DEASSIGN)) 808 return mshv_deassign_ioeventfd(pt, args); 809 810 return mshv_assign_ioeventfd(pt, args); 811 } 812 813 void mshv_eventfd_init(struct mshv_partition *pt) 814 { 815 spin_lock_init(&pt->pt_irqfds_lock); 816 INIT_HLIST_HEAD(&pt->pt_irqfds_list); 817 818 INIT_HLIST_HEAD(&pt->irqfds_resampler_list); 819 mutex_init(&pt->irqfds_resampler_lock); 820 821 INIT_HLIST_HEAD(&pt->ioeventfds_list); 822 } 823 824 void mshv_eventfd_release(struct mshv_partition *pt) 825 { 826 struct hlist_head items; 827 struct hlist_node *n; 828 struct mshv_ioeventfd *p; 829 830 hlist_move_list(&pt->ioeventfds_list, &items); 831 synchronize_rcu(); 832 833 hlist_for_each_entry_safe(p, n, &items, iovntfd_hnode) { 834 hlist_del(&p->iovntfd_hnode); 835 ioeventfd_release(p, pt->pt_id); 836 } 837 838 mshv_irqfd_release(pt); 839 } 840