xref: /linux/io_uring/eventfd.c (revision 8be01e1280912a84f6bcf963ceed6c9f13ba1986)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/mm.h>
5 #include <linux/slab.h>
6 #include <linux/eventfd.h>
7 #include <linux/eventpoll.h>
8 #include <linux/io_uring.h>
9 #include <linux/io_uring_types.h>
10 
11 #include "io-wq.h"
12 #include "eventfd.h"
13 
14 struct io_ev_fd {
15 	struct eventfd_ctx	*cq_ev_fd;
16 	unsigned int		eventfd_async;
17 	/* protected by ->completion_lock */
18 	unsigned		last_cq_tail;
19 	refcount_t		refs;
20 	atomic_t		ops;
21 	struct rcu_head		rcu;
22 };
23 
24 enum {
25 	IO_EVENTFD_OP_SIGNAL_BIT,
26 };
27 
io_eventfd_free(struct rcu_head * rcu)28 static void io_eventfd_free(struct rcu_head *rcu)
29 {
30 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
31 
32 	eventfd_ctx_put(ev_fd->cq_ev_fd);
33 	kfree(ev_fd);
34 }
35 
io_eventfd_put(struct io_ev_fd * ev_fd)36 static void io_eventfd_put(struct io_ev_fd *ev_fd)
37 {
38 	if (refcount_dec_and_test(&ev_fd->refs))
39 		call_rcu(&ev_fd->rcu, io_eventfd_free);
40 }
41 
io_eventfd_do_signal(struct rcu_head * rcu)42 static void io_eventfd_do_signal(struct rcu_head *rcu)
43 {
44 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
45 
46 	atomic_andnot(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops);
47 	eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
48 	io_eventfd_put(ev_fd);
49 }
50 
51 /*
52  * Returns true if the caller should put the ev_fd reference, false if not.
53  */
__io_eventfd_signal(struct io_ev_fd * ev_fd)54 static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
55 {
56 	if (eventfd_signal_allowed()) {
57 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
58 		return true;
59 	}
60 	if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
61 		call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
62 		return false;
63 	}
64 	return true;
65 }
66 
67 /*
68  * Trigger if eventfd_async isn't set, or if it's set and the caller is
69  * an async worker.
70  */
io_eventfd_trigger(struct io_ev_fd * ev_fd)71 static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
72 {
73 	return !ev_fd->eventfd_async || io_wq_current_is_worker();
74 }
75 
io_eventfd_signal(struct io_ring_ctx * ctx,bool cqe_event)76 void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
77 {
78 	bool skip = false;
79 	struct io_ev_fd *ev_fd;
80 	struct io_rings *rings;
81 
82 	guard(rcu)();
83 
84 	rings = rcu_dereference(ctx->rings_rcu);
85 	if (!rings)
86 		return;
87 	if (READ_ONCE(rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
88 		return;
89 	ev_fd = rcu_dereference(ctx->io_ev_fd);
90 	/*
91 	 * Check again if ev_fd exists in case an io_eventfd_unregister call
92 	 * completed between the NULL check of ctx->io_ev_fd at the start of
93 	 * the function and rcu_read_lock.
94 	 */
95 	if (!ev_fd)
96 		return;
97 	if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
98 		return;
99 
100 	if (cqe_event) {
101 		/*
102 		 * Eventfd should only get triggered when at least one event
103 		 * has been posted. Some applications rely on the eventfd
104 		 * notification count only changing IFF a new CQE has been
105 		 * added to the CQ ring. There's no dependency on 1:1
106 		 * relationship between how many times this function is called
107 		 * (and hence the eventfd count) and number of CQEs posted to
108 		 * the CQ ring.
109 		 */
110 		spin_lock(&ctx->completion_lock);
111 		skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
112 		ev_fd->last_cq_tail = ctx->cached_cq_tail;
113 		spin_unlock(&ctx->completion_lock);
114 	}
115 
116 	if (skip || __io_eventfd_signal(ev_fd))
117 		io_eventfd_put(ev_fd);
118 }
119 
io_eventfd_register(struct io_ring_ctx * ctx,void __user * arg,unsigned int eventfd_async)120 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
121 			unsigned int eventfd_async)
122 {
123 	struct io_ev_fd *ev_fd;
124 	__s32 __user *fds = arg;
125 	int fd;
126 
127 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
128 					lockdep_is_held(&ctx->uring_lock));
129 	if (ev_fd)
130 		return -EBUSY;
131 
132 	if (copy_from_user(&fd, fds, sizeof(*fds)))
133 		return -EFAULT;
134 
135 	ev_fd = kmalloc_obj(*ev_fd);
136 	if (!ev_fd)
137 		return -ENOMEM;
138 
139 	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
140 	if (IS_ERR(ev_fd->cq_ev_fd)) {
141 		int ret = PTR_ERR(ev_fd->cq_ev_fd);
142 
143 		kfree(ev_fd);
144 		return ret;
145 	}
146 
147 	spin_lock(&ctx->completion_lock);
148 	ev_fd->last_cq_tail = ctx->cached_cq_tail;
149 	spin_unlock(&ctx->completion_lock);
150 
151 	ev_fd->eventfd_async = eventfd_async;
152 	ctx->int_flags |= IO_RING_F_HAS_EVFD;
153 	refcount_set(&ev_fd->refs, 1);
154 	atomic_set(&ev_fd->ops, 0);
155 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
156 	return 0;
157 }
158 
io_eventfd_unregister(struct io_ring_ctx * ctx)159 int io_eventfd_unregister(struct io_ring_ctx *ctx)
160 {
161 	struct io_ev_fd *ev_fd;
162 
163 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
164 					lockdep_is_held(&ctx->uring_lock));
165 	if (ev_fd) {
166 		ctx->int_flags &= ~IO_RING_F_HAS_EVFD;
167 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
168 		io_eventfd_put(ev_fd);
169 		return 0;
170 	}
171 
172 	return -ENXIO;
173 }
174