1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/mm.h> 5 #include <linux/slab.h> 6 #include <linux/eventfd.h> 7 #include <linux/eventpoll.h> 8 #include <linux/io_uring.h> 9 #include <linux/io_uring_types.h> 10 11 #include "io-wq.h" 12 #include "eventfd.h" 13 14 struct io_ev_fd { 15 struct eventfd_ctx *cq_ev_fd; 16 unsigned int eventfd_async: 1; 17 struct rcu_head rcu; 18 refcount_t refs; 19 atomic_t ops; 20 }; 21 22 enum { 23 IO_EVENTFD_OP_SIGNAL_BIT, 24 }; 25 26 static void io_eventfd_free(struct rcu_head *rcu) 27 { 28 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 29 30 eventfd_ctx_put(ev_fd->cq_ev_fd); 31 kfree(ev_fd); 32 } 33 34 static void io_eventfd_do_signal(struct rcu_head *rcu) 35 { 36 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 37 38 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 39 40 if (refcount_dec_and_test(&ev_fd->refs)) 41 io_eventfd_free(rcu); 42 } 43 44 void io_eventfd_signal(struct io_ring_ctx *ctx) 45 { 46 struct io_ev_fd *ev_fd = NULL; 47 48 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) 49 return; 50 51 guard(rcu)(); 52 53 /* 54 * rcu_dereference ctx->io_ev_fd once and use it for both for checking 55 * and eventfd_signal 56 */ 57 ev_fd = rcu_dereference(ctx->io_ev_fd); 58 59 /* 60 * Check again if ev_fd exists incase an io_eventfd_unregister call 61 * completed between the NULL check of ctx->io_ev_fd at the start of 62 * the function and rcu_read_lock. 63 */ 64 if (unlikely(!ev_fd)) 65 return; 66 if (!refcount_inc_not_zero(&ev_fd->refs)) 67 return; 68 if (ev_fd->eventfd_async && !io_wq_current_is_worker()) 69 goto out; 70 71 if (likely(eventfd_signal_allowed())) { 72 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 73 } else { 74 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { 75 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); 76 return; 77 } 78 } 79 out: 80 if (refcount_dec_and_test(&ev_fd->refs)) 81 call_rcu(&ev_fd->rcu, io_eventfd_free); 82 } 83 84 void io_eventfd_flush_signal(struct io_ring_ctx *ctx) 85 { 86 bool skip; 87 88 spin_lock(&ctx->completion_lock); 89 90 /* 91 * Eventfd should only get triggered when at least one event has been 92 * posted. Some applications rely on the eventfd notification count 93 * only changing IFF a new CQE has been added to the CQ ring. There's 94 * no depedency on 1:1 relationship between how many times this 95 * function is called (and hence the eventfd count) and number of CQEs 96 * posted to the CQ ring. 97 */ 98 skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail; 99 ctx->evfd_last_cq_tail = ctx->cached_cq_tail; 100 spin_unlock(&ctx->completion_lock); 101 if (skip) 102 return; 103 104 io_eventfd_signal(ctx); 105 } 106 107 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, 108 unsigned int eventfd_async) 109 { 110 struct io_ev_fd *ev_fd; 111 __s32 __user *fds = arg; 112 int fd; 113 114 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 115 lockdep_is_held(&ctx->uring_lock)); 116 if (ev_fd) 117 return -EBUSY; 118 119 if (copy_from_user(&fd, fds, sizeof(*fds))) 120 return -EFAULT; 121 122 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); 123 if (!ev_fd) 124 return -ENOMEM; 125 126 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); 127 if (IS_ERR(ev_fd->cq_ev_fd)) { 128 int ret = PTR_ERR(ev_fd->cq_ev_fd); 129 130 kfree(ev_fd); 131 return ret; 132 } 133 134 spin_lock(&ctx->completion_lock); 135 ctx->evfd_last_cq_tail = ctx->cached_cq_tail; 136 spin_unlock(&ctx->completion_lock); 137 138 ev_fd->eventfd_async = eventfd_async; 139 ctx->has_evfd = true; 140 refcount_set(&ev_fd->refs, 1); 141 atomic_set(&ev_fd->ops, 0); 142 rcu_assign_pointer(ctx->io_ev_fd, ev_fd); 143 return 0; 144 } 145 146 int io_eventfd_unregister(struct io_ring_ctx *ctx) 147 { 148 struct io_ev_fd *ev_fd; 149 150 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 151 lockdep_is_held(&ctx->uring_lock)); 152 if (ev_fd) { 153 ctx->has_evfd = false; 154 rcu_assign_pointer(ctx->io_ev_fd, NULL); 155 if (refcount_dec_and_test(&ev_fd->refs)) 156 call_rcu(&ev_fd->rcu, io_eventfd_free); 157 return 0; 158 } 159 160 return -ENXIO; 161 } 162