1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/mm.h>
5 #include <linux/slab.h>
6 #include <linux/eventfd.h>
7 #include <linux/eventpoll.h>
8 #include <linux/io_uring.h>
9 #include <linux/io_uring_types.h>
10
11 #include "io-wq.h"
12 #include "eventfd.h"
13
14 struct io_ev_fd {
15 struct eventfd_ctx *cq_ev_fd;
16 unsigned int eventfd_async;
17 /* protected by ->completion_lock */
18 unsigned last_cq_tail;
19 refcount_t refs;
20 atomic_t ops;
21 struct rcu_head rcu;
22 };
23
24 enum {
25 IO_EVENTFD_OP_SIGNAL_BIT,
26 };
27
io_eventfd_free(struct rcu_head * rcu)28 static void io_eventfd_free(struct rcu_head *rcu)
29 {
30 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
31
32 eventfd_ctx_put(ev_fd->cq_ev_fd);
33 kfree(ev_fd);
34 }
35
io_eventfd_put(struct io_ev_fd * ev_fd)36 static void io_eventfd_put(struct io_ev_fd *ev_fd)
37 {
38 if (refcount_dec_and_test(&ev_fd->refs))
39 call_rcu(&ev_fd->rcu, io_eventfd_free);
40 }
41
io_eventfd_do_signal(struct rcu_head * rcu)42 static void io_eventfd_do_signal(struct rcu_head *rcu)
43 {
44 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
45
46 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
47 io_eventfd_put(ev_fd);
48 }
49
50 /*
51 * Returns true if the caller should put the ev_fd reference, false if not.
52 */
__io_eventfd_signal(struct io_ev_fd * ev_fd)53 static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
54 {
55 if (eventfd_signal_allowed()) {
56 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
57 return true;
58 }
59 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
60 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
61 return false;
62 }
63 return true;
64 }
65
66 /*
67 * Trigger if eventfd_async isn't set, or if it's set and the caller is
68 * an async worker.
69 */
io_eventfd_trigger(struct io_ev_fd * ev_fd)70 static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
71 {
72 return !ev_fd->eventfd_async || io_wq_current_is_worker();
73 }
74
io_eventfd_signal(struct io_ring_ctx * ctx,bool cqe_event)75 void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
76 {
77 bool skip = false;
78 struct io_ev_fd *ev_fd;
79 struct io_rings *rings;
80
81 guard(rcu)();
82
83 rings = rcu_dereference(ctx->rings_rcu);
84 if (!rings)
85 return;
86 if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
87 return;
88 ev_fd = rcu_dereference(ctx->io_ev_fd);
89 /*
90 * Check again if ev_fd exists in case an io_eventfd_unregister call
91 * completed between the NULL check of ctx->io_ev_fd at the start of
92 * the function and rcu_read_lock.
93 */
94 if (!ev_fd)
95 return;
96 if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
97 return;
98
99 if (cqe_event) {
100 /*
101 * Eventfd should only get triggered when at least one event
102 * has been posted. Some applications rely on the eventfd
103 * notification count only changing IFF a new CQE has been
104 * added to the CQ ring. There's no dependency on 1:1
105 * relationship between how many times this function is called
106 * (and hence the eventfd count) and number of CQEs posted to
107 * the CQ ring.
108 */
109 spin_lock(&ctx->completion_lock);
110 skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
111 ev_fd->last_cq_tail = ctx->cached_cq_tail;
112 spin_unlock(&ctx->completion_lock);
113 }
114
115 if (skip || __io_eventfd_signal(ev_fd))
116 io_eventfd_put(ev_fd);
117 }
118
io_eventfd_register(struct io_ring_ctx * ctx,void __user * arg,unsigned int eventfd_async)119 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
120 unsigned int eventfd_async)
121 {
122 struct io_ev_fd *ev_fd;
123 __s32 __user *fds = arg;
124 int fd;
125
126 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
127 lockdep_is_held(&ctx->uring_lock));
128 if (ev_fd)
129 return -EBUSY;
130
131 if (copy_from_user(&fd, fds, sizeof(*fds)))
132 return -EFAULT;
133
134 ev_fd = kmalloc_obj(*ev_fd);
135 if (!ev_fd)
136 return -ENOMEM;
137
138 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
139 if (IS_ERR(ev_fd->cq_ev_fd)) {
140 int ret = PTR_ERR(ev_fd->cq_ev_fd);
141
142 kfree(ev_fd);
143 return ret;
144 }
145
146 spin_lock(&ctx->completion_lock);
147 ev_fd->last_cq_tail = ctx->cached_cq_tail;
148 spin_unlock(&ctx->completion_lock);
149
150 ev_fd->eventfd_async = eventfd_async;
151 ctx->has_evfd = true;
152 refcount_set(&ev_fd->refs, 1);
153 atomic_set(&ev_fd->ops, 0);
154 rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
155 return 0;
156 }
157
io_eventfd_unregister(struct io_ring_ctx * ctx)158 int io_eventfd_unregister(struct io_ring_ctx *ctx)
159 {
160 struct io_ev_fd *ev_fd;
161
162 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
163 lockdep_is_held(&ctx->uring_lock));
164 if (ev_fd) {
165 ctx->has_evfd = false;
166 rcu_assign_pointer(ctx->io_ev_fd, NULL);
167 io_eventfd_put(ev_fd);
168 return 0;
169 }
170
171 return -ENXIO;
172 }
173