1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/kernel.h> 3 #include <linux/errno.h> 4 #include <linux/mm.h> 5 #include <linux/slab.h> 6 #include <linux/eventfd.h> 7 #include <linux/eventpoll.h> 8 #include <linux/io_uring.h> 9 #include <linux/io_uring_types.h> 10 11 #include "io-wq.h" 12 #include "eventfd.h" 13 14 struct io_ev_fd { 15 struct eventfd_ctx *cq_ev_fd; 16 unsigned int eventfd_async; 17 /* protected by ->completion_lock */ 18 unsigned last_cq_tail; 19 refcount_t refs; 20 atomic_t ops; 21 struct rcu_head rcu; 22 }; 23 24 enum { 25 IO_EVENTFD_OP_SIGNAL_BIT, 26 }; 27 28 static void io_eventfd_free(struct rcu_head *rcu) 29 { 30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 31 32 eventfd_ctx_put(ev_fd->cq_ev_fd); 33 kfree(ev_fd); 34 } 35 36 static void io_eventfd_put(struct io_ev_fd *ev_fd) 37 { 38 if (refcount_dec_and_test(&ev_fd->refs)) 39 call_rcu(&ev_fd->rcu, io_eventfd_free); 40 } 41 42 static void io_eventfd_do_signal(struct rcu_head *rcu) 43 { 44 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); 45 46 atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops); 47 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 48 io_eventfd_put(ev_fd); 49 } 50 51 /* 52 * Returns true if the caller should put the ev_fd reference, false if not. 53 */ 54 static bool __io_eventfd_signal(struct io_ev_fd *ev_fd) 55 { 56 if (eventfd_signal_allowed()) { 57 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); 58 return true; 59 } 60 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { 61 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); 62 return false; 63 } 64 return true; 65 } 66 67 /* 68 * Trigger if eventfd_async isn't set, or if it's set and the caller is 69 * an async worker. 70 */ 71 static bool io_eventfd_trigger(struct io_ev_fd *ev_fd) 72 { 73 return !ev_fd->eventfd_async || io_wq_current_is_worker(); 74 } 75 76 void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event) 77 { 78 bool skip = false; 79 struct io_ev_fd *ev_fd; 80 struct io_rings *rings; 81 82 guard(rcu)(); 83 84 rings = rcu_dereference(ctx->rings_rcu); 85 if (!rings) 86 return; 87 if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) 88 return; 89 ev_fd = rcu_dereference(ctx->io_ev_fd); 90 /* 91 * Check again if ev_fd exists in case an io_eventfd_unregister call 92 * completed between the NULL check of ctx->io_ev_fd at the start of 93 * the function and rcu_read_lock. 94 */ 95 if (!ev_fd) 96 return; 97 if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs)) 98 return; 99 100 if (cqe_event) { 101 /* 102 * Eventfd should only get triggered when at least one event 103 * has been posted. Some applications rely on the eventfd 104 * notification count only changing IFF a new CQE has been 105 * added to the CQ ring. There's no dependency on 1:1 106 * relationship between how many times this function is called 107 * (and hence the eventfd count) and number of CQEs posted to 108 * the CQ ring. 109 */ 110 spin_lock(&ctx->completion_lock); 111 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail; 112 ev_fd->last_cq_tail = ctx->cached_cq_tail; 113 spin_unlock(&ctx->completion_lock); 114 } 115 116 if (skip || __io_eventfd_signal(ev_fd)) 117 io_eventfd_put(ev_fd); 118 } 119 120 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, 121 unsigned int eventfd_async) 122 { 123 struct io_ev_fd *ev_fd; 124 __s32 __user *fds = arg; 125 int fd; 126 127 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 128 lockdep_is_held(&ctx->uring_lock)); 129 if (ev_fd) 130 return -EBUSY; 131 132 if (copy_from_user(&fd, fds, sizeof(*fds))) 133 return -EFAULT; 134 135 ev_fd = kmalloc_obj(*ev_fd); 136 if (!ev_fd) 137 return -ENOMEM; 138 139 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); 140 if (IS_ERR(ev_fd->cq_ev_fd)) { 141 int ret = PTR_ERR(ev_fd->cq_ev_fd); 142 143 kfree(ev_fd); 144 return ret; 145 } 146 147 spin_lock(&ctx->completion_lock); 148 ev_fd->last_cq_tail = ctx->cached_cq_tail; 149 spin_unlock(&ctx->completion_lock); 150 151 ev_fd->eventfd_async = eventfd_async; 152 ctx->int_flags |= IO_RING_F_HAS_EVFD; 153 refcount_set(&ev_fd->refs, 1); 154 atomic_set(&ev_fd->ops, 0); 155 rcu_assign_pointer(ctx->io_ev_fd, ev_fd); 156 return 0; 157 } 158 159 int io_eventfd_unregister(struct io_ring_ctx *ctx) 160 { 161 struct io_ev_fd *ev_fd; 162 163 ev_fd = rcu_dereference_protected(ctx->io_ev_fd, 164 lockdep_is_held(&ctx->uring_lock)); 165 if (ev_fd) { 166 ctx->int_flags &= ~IO_RING_F_HAS_EVFD; 167 rcu_assign_pointer(ctx->io_ev_fd, NULL); 168 io_eventfd_put(ev_fd); 169 return 0; 170 } 171 172 return -ENXIO; 173 } 174