1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/mm.h>
5 #include <linux/slab.h>
6 #include <linux/eventfd.h>
7 #include <linux/eventpoll.h>
8 #include <linux/io_uring.h>
9 #include <linux/io_uring_types.h>
10
11 #include "io-wq.h"
12 #include "eventfd.h"
13
14 struct io_ev_fd {
15 struct eventfd_ctx *cq_ev_fd;
16 unsigned int eventfd_async: 1;
17 struct rcu_head rcu;
18 refcount_t refs;
19 atomic_t ops;
20 };
21
22 enum {
23 IO_EVENTFD_OP_SIGNAL_BIT,
24 };
25
io_eventfd_free(struct rcu_head * rcu)26 static void io_eventfd_free(struct rcu_head *rcu)
27 {
28 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
29
30 eventfd_ctx_put(ev_fd->cq_ev_fd);
31 kfree(ev_fd);
32 }
33
io_eventfd_do_signal(struct rcu_head * rcu)34 static void io_eventfd_do_signal(struct rcu_head *rcu)
35 {
36 struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
37
38 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
39
40 if (refcount_dec_and_test(&ev_fd->refs))
41 io_eventfd_free(rcu);
42 }
43
io_eventfd_signal(struct io_ring_ctx * ctx)44 void io_eventfd_signal(struct io_ring_ctx *ctx)
45 {
46 struct io_ev_fd *ev_fd = NULL;
47
48 if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
49 return;
50
51 guard(rcu)();
52
53 /*
54 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
55 * and eventfd_signal
56 */
57 ev_fd = rcu_dereference(ctx->io_ev_fd);
58
59 /*
60 * Check again if ev_fd exists incase an io_eventfd_unregister call
61 * completed between the NULL check of ctx->io_ev_fd at the start of
62 * the function and rcu_read_lock.
63 */
64 if (unlikely(!ev_fd))
65 return;
66 if (!refcount_inc_not_zero(&ev_fd->refs))
67 return;
68 if (ev_fd->eventfd_async && !io_wq_current_is_worker())
69 goto out;
70
71 if (likely(eventfd_signal_allowed())) {
72 eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
73 } else {
74 if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
75 call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
76 return;
77 }
78 }
79 out:
80 if (refcount_dec_and_test(&ev_fd->refs))
81 call_rcu(&ev_fd->rcu, io_eventfd_free);
82 }
83
io_eventfd_flush_signal(struct io_ring_ctx * ctx)84 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
85 {
86 bool skip;
87
88 spin_lock(&ctx->completion_lock);
89
90 /*
91 * Eventfd should only get triggered when at least one event has been
92 * posted. Some applications rely on the eventfd notification count
93 * only changing IFF a new CQE has been added to the CQ ring. There's
94 * no depedency on 1:1 relationship between how many times this
95 * function is called (and hence the eventfd count) and number of CQEs
96 * posted to the CQ ring.
97 */
98 skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
99 ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
100 spin_unlock(&ctx->completion_lock);
101 if (skip)
102 return;
103
104 io_eventfd_signal(ctx);
105 }
106
io_eventfd_register(struct io_ring_ctx * ctx,void __user * arg,unsigned int eventfd_async)107 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
108 unsigned int eventfd_async)
109 {
110 struct io_ev_fd *ev_fd;
111 __s32 __user *fds = arg;
112 int fd;
113
114 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
115 lockdep_is_held(&ctx->uring_lock));
116 if (ev_fd)
117 return -EBUSY;
118
119 if (copy_from_user(&fd, fds, sizeof(*fds)))
120 return -EFAULT;
121
122 ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
123 if (!ev_fd)
124 return -ENOMEM;
125
126 ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
127 if (IS_ERR(ev_fd->cq_ev_fd)) {
128 int ret = PTR_ERR(ev_fd->cq_ev_fd);
129
130 kfree(ev_fd);
131 return ret;
132 }
133
134 spin_lock(&ctx->completion_lock);
135 ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
136 spin_unlock(&ctx->completion_lock);
137
138 ev_fd->eventfd_async = eventfd_async;
139 ctx->has_evfd = true;
140 refcount_set(&ev_fd->refs, 1);
141 atomic_set(&ev_fd->ops, 0);
142 rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
143 return 0;
144 }
145
io_eventfd_unregister(struct io_ring_ctx * ctx)146 int io_eventfd_unregister(struct io_ring_ctx *ctx)
147 {
148 struct io_ev_fd *ev_fd;
149
150 ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
151 lockdep_is_held(&ctx->uring_lock));
152 if (ev_fd) {
153 ctx->has_evfd = false;
154 rcu_assign_pointer(ctx->io_ev_fd, NULL);
155 if (refcount_dec_and_test(&ev_fd->refs))
156 call_rcu(&ev_fd->rcu, io_eventfd_free);
157 return 0;
158 }
159
160 return -ENXIO;
161 }
162