1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/mm.h> 5 #include <linux/slab.h> 6 #include <linux/eventfd.h> 7 #include <linux/eventpoll.h> 8 #include <linux/io_uring.h> 9 #include <linux/io_uring_types.h> 10 11 #include "io-wq.h" 12 #include "eventfd.h" 13 14 struct io_ev_fd { 15 struct eventfd_ctx *cq_ev_fd; 16 unsigned int eventfd_async; 17 /* protected by ->completion_lock */ 18 unsigned last_cq_tail; 19 refcount_t refs; 20 atomic_t ops; 21 struct rcu_head rcu; 22 }; 23 24 enum { 25 IO_EVENTFD_OP_SIGNAL_BIT, 26 }; 27 28 static void io_eventfd_free(struct rcu_head *rcu) 29 { 30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 31 32 eventfd_ctx_put(ev_fd->cq_ev_fd); 33 kfree(ev_fd); 34 } 35 36 static void io_eventfd_do_signal(struct rcu_head *rcu) 37 { 38 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 39 40 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 41 42 if (refcount_dec_and_test(&ev_fd->refs)) 43 io_eventfd_free(rcu); 44 } 45 46 static void io_eventfd_put(struct io_ev_fd *ev_fd) 47 { 48 if (refcount_dec_and_test(&ev_fd->refs)) 49 call_rcu(&ev_fd->rcu, io_eventfd_free); 50 } 51 52 static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref) 53 { 54 if (put_ref) 55 io_eventfd_put(ev_fd); 56 rcu_read_unlock(); 57 } 58 59 /* 60 * Returns true if the caller should put the ev_fd reference, false if not. 61 */ 62 static bool __io_eventfd_signal(struct io_ev_fd *ev_fd) 63 { 64 if (eventfd_signal_allowed()) { 65 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 66 return true; 67 } 68 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { 69 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); 70 return false; 71 } 72 return true; 73 } 74 75 /* 76 * Trigger if eventfd_async isn't set, or if it's set and the caller is 77 * an async worker. If ev_fd isn't valid, obviously return false. 78 */ 79 static bool io_eventfd_trigger(struct io_ev_fd *ev_fd) 80 { 81 if (ev_fd) 82 return !ev_fd->eventfd_async || io_wq_current_is_worker(); 83 return false; 84 } 85 86 /* 87 * On success, returns with an ev_fd reference grabbed and the RCU read 88 * lock held. 89 */ 90 static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx) 91 { 92 struct io_ev_fd *ev_fd; 93 94 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) 95 return NULL; 96 97 rcu_read_lock(); 98 99 /* 100 * rcu_dereference ctx->io_ev_fd once and use it for both for checking 101 * and eventfd_signal 102 */ 103 ev_fd = rcu_dereference(ctx->io_ev_fd); 104 105 /* 106 * Check again if ev_fd exists in case an io_eventfd_unregister call 107 * completed between the NULL check of ctx->io_ev_fd at the start of 108 * the function and rcu_read_lock. 109 */ 110 if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs)) 111 return ev_fd; 112 113 rcu_read_unlock(); 114 return NULL; 115 } 116 117 void io_eventfd_signal(struct io_ring_ctx *ctx) 118 { 119 struct io_ev_fd *ev_fd; 120 121 ev_fd = io_eventfd_grab(ctx); 122 if (ev_fd) 123 io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd)); 124 } 125 126 void io_eventfd_flush_signal(struct io_ring_ctx *ctx) 127 { 128 struct io_ev_fd *ev_fd; 129 130 ev_fd = io_eventfd_grab(ctx); 131 if (ev_fd) { 132 bool skip, put_ref = true; 133 134 /* 135 * Eventfd should only get triggered when at least one event 136 * has been posted. Some applications rely on the eventfd 137 * notification count only changing IFF a new CQE has been 138 * added to the CQ ring. There's no dependency on 1:1 139 * relationship between how many times this function is called 140 * (and hence the eventfd count) and number of CQEs posted to 141 * the CQ ring. 142 */ 143 spin_lock(&ctx->completion_lock); 144 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail; 145 ev_fd->last_cq_tail = ctx->cached_cq_tail; 146 spin_unlock(&ctx->completion_lock); 147 148 if (!skip) 149 put_ref = __io_eventfd_signal(ev_fd); 150 151 io_eventfd_release(ev_fd, put_ref); 152 } 153 } 154 155 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, 156 unsigned int eventfd_async) 157 { 158 struct io_ev_fd *ev_fd; 159 __s32 __user *fds = arg; 160 int fd; 161 162 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 163 lockdep_is_held(&ctx->uring_lock)); 164 if (ev_fd) 165 return -EBUSY; 166 167 if (copy_from_user(&fd, fds, sizeof(*fds))) 168 return -EFAULT; 169 170 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); 171 if (!ev_fd) 172 return -ENOMEM; 173 174 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); 175 if (IS_ERR(ev_fd->cq_ev_fd)) { 176 int ret = PTR_ERR(ev_fd->cq_ev_fd); 177 178 kfree(ev_fd); 179 return ret; 180 } 181 182 spin_lock(&ctx->completion_lock); 183 ev_fd->last_cq_tail = ctx->cached_cq_tail; 184 spin_unlock(&ctx->completion_lock); 185 186 ev_fd->eventfd_async = eventfd_async; 187 ctx->has_evfd = true; 188 refcount_set(&ev_fd->refs, 1); 189 atomic_set(&ev_fd->ops, 0); 190 rcu_assign_pointer(ctx->io_ev_fd, ev_fd); 191 return 0; 192 } 193 194 int io_eventfd_unregister(struct io_ring_ctx *ctx) 195 { 196 struct io_ev_fd *ev_fd; 197 198 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 199 lockdep_is_held(&ctx->uring_lock)); 200 if (ev_fd) { 201 ctx->has_evfd = false; 202 rcu_assign_pointer(ctx->io_ev_fd, NULL); 203 io_eventfd_put(ev_fd); 204 return 0; 205 } 206 207 return -ENXIO; 208 } 209