xref: /linux/io_uring/eventfd.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/mm.h>
5 #include <linux/slab.h>
6 #include <linux/eventfd.h>
7 #include <linux/eventpoll.h>
8 #include <linux/io_uring.h>
9 #include <linux/io_uring_types.h>
10 
11 #include "io-wq.h"
12 #include "eventfd.h"
13 
14 struct io_ev_fd {
15 	struct eventfd_ctx	*cq_ev_fd;
16 	unsigned int		eventfd_async;
17 	/* protected by ->completion_lock */
18 	unsigned		last_cq_tail;
19 	refcount_t		refs;
20 	atomic_t		ops;
21 	struct rcu_head		rcu;
22 };
23 
24 enum {
25 	IO_EVENTFD_OP_SIGNAL_BIT,
26 };
27 
io_eventfd_free(struct rcu_head * rcu)28 static void io_eventfd_free(struct rcu_head *rcu)
29 {
30 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
31 
32 	eventfd_ctx_put(ev_fd->cq_ev_fd);
33 	kfree(ev_fd);
34 }
35 
io_eventfd_do_signal(struct rcu_head * rcu)36 static void io_eventfd_do_signal(struct rcu_head *rcu)
37 {
38 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
39 
40 	eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
41 
42 	if (refcount_dec_and_test(&ev_fd->refs))
43 		io_eventfd_free(rcu);
44 }
45 
io_eventfd_put(struct io_ev_fd * ev_fd)46 static void io_eventfd_put(struct io_ev_fd *ev_fd)
47 {
48 	if (refcount_dec_and_test(&ev_fd->refs))
49 		call_rcu(&ev_fd->rcu, io_eventfd_free);
50 }
51 
io_eventfd_release(struct io_ev_fd * ev_fd,bool put_ref)52 static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
53 {
54 	if (put_ref)
55 		io_eventfd_put(ev_fd);
56 	rcu_read_unlock();
57 }
58 
59 /*
60  * Returns true if the caller should put the ev_fd reference, false if not.
61  */
__io_eventfd_signal(struct io_ev_fd * ev_fd)62 static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
63 {
64 	if (eventfd_signal_allowed()) {
65 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
66 		return true;
67 	}
68 	if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
69 		call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
70 		return false;
71 	}
72 	return true;
73 }
74 
75 /*
76  * Trigger if eventfd_async isn't set, or if it's set and the caller is
77  * an async worker. If ev_fd isn't valid, obviously return false.
78  */
io_eventfd_trigger(struct io_ev_fd * ev_fd)79 static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
80 {
81 	if (ev_fd)
82 		return !ev_fd->eventfd_async || io_wq_current_is_worker();
83 	return false;
84 }
85 
86 /*
87  * On success, returns with an ev_fd reference grabbed and the RCU read
88  * lock held.
89  */
io_eventfd_grab(struct io_ring_ctx * ctx)90 static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
91 {
92 	struct io_ev_fd *ev_fd;
93 
94 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
95 		return NULL;
96 
97 	rcu_read_lock();
98 
99 	/*
100 	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
101 	 * and eventfd_signal
102 	 */
103 	ev_fd = rcu_dereference(ctx->io_ev_fd);
104 
105 	/*
106 	 * Check again if ev_fd exists in case an io_eventfd_unregister call
107 	 * completed between the NULL check of ctx->io_ev_fd at the start of
108 	 * the function and rcu_read_lock.
109 	 */
110 	if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
111 		return ev_fd;
112 
113 	rcu_read_unlock();
114 	return NULL;
115 }
116 
io_eventfd_signal(struct io_ring_ctx * ctx)117 void io_eventfd_signal(struct io_ring_ctx *ctx)
118 {
119 	struct io_ev_fd *ev_fd;
120 
121 	ev_fd = io_eventfd_grab(ctx);
122 	if (ev_fd)
123 		io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
124 }
125 
io_eventfd_flush_signal(struct io_ring_ctx * ctx)126 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
127 {
128 	struct io_ev_fd *ev_fd;
129 
130 	ev_fd = io_eventfd_grab(ctx);
131 	if (ev_fd) {
132 		bool skip, put_ref = true;
133 
134 		/*
135 		 * Eventfd should only get triggered when at least one event
136 		 * has been posted. Some applications rely on the eventfd
137 		 * notification count only changing IFF a new CQE has been
138 		 * added to the CQ ring. There's no dependency on 1:1
139 		 * relationship between how many times this function is called
140 		 * (and hence the eventfd count) and number of CQEs posted to
141 		 * the CQ ring.
142 		 */
143 		spin_lock(&ctx->completion_lock);
144 		skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
145 		ev_fd->last_cq_tail = ctx->cached_cq_tail;
146 		spin_unlock(&ctx->completion_lock);
147 
148 		if (!skip)
149 			put_ref = __io_eventfd_signal(ev_fd);
150 
151 		io_eventfd_release(ev_fd, put_ref);
152 	}
153 }
154 
io_eventfd_register(struct io_ring_ctx * ctx,void __user * arg,unsigned int eventfd_async)155 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
156 			unsigned int eventfd_async)
157 {
158 	struct io_ev_fd *ev_fd;
159 	__s32 __user *fds = arg;
160 	int fd;
161 
162 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
163 					lockdep_is_held(&ctx->uring_lock));
164 	if (ev_fd)
165 		return -EBUSY;
166 
167 	if (copy_from_user(&fd, fds, sizeof(*fds)))
168 		return -EFAULT;
169 
170 	ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
171 	if (!ev_fd)
172 		return -ENOMEM;
173 
174 	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
175 	if (IS_ERR(ev_fd->cq_ev_fd)) {
176 		int ret = PTR_ERR(ev_fd->cq_ev_fd);
177 
178 		kfree(ev_fd);
179 		return ret;
180 	}
181 
182 	spin_lock(&ctx->completion_lock);
183 	ev_fd->last_cq_tail = ctx->cached_cq_tail;
184 	spin_unlock(&ctx->completion_lock);
185 
186 	ev_fd->eventfd_async = eventfd_async;
187 	ctx->has_evfd = true;
188 	refcount_set(&ev_fd->refs, 1);
189 	atomic_set(&ev_fd->ops, 0);
190 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
191 	return 0;
192 }
193 
io_eventfd_unregister(struct io_ring_ctx * ctx)194 int io_eventfd_unregister(struct io_ring_ctx *ctx)
195 {
196 	struct io_ev_fd *ev_fd;
197 
198 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
199 					lockdep_is_held(&ctx->uring_lock));
200 	if (ev_fd) {
201 		ctx->has_evfd = false;
202 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
203 		io_eventfd_put(ev_fd);
204 		return 0;
205 	}
206 
207 	return -ENXIO;
208 }
209