xref: /linux/io_uring/eventfd.c (revision 566ab427f827b0256d3e8ce0235d088e6a9c28bd)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/mm.h>
5 #include <linux/slab.h>
6 #include <linux/eventfd.h>
7 #include <linux/eventpoll.h>
8 #include <linux/io_uring.h>
9 #include <linux/io_uring_types.h>
10 
11 #include "io-wq.h"
12 #include "eventfd.h"
13 
14 struct io_ev_fd {
15 	struct eventfd_ctx	*cq_ev_fd;
16 	unsigned int		eventfd_async: 1;
17 	struct rcu_head		rcu;
18 	refcount_t		refs;
19 	atomic_t		ops;
20 };
21 
22 enum {
23 	IO_EVENTFD_OP_SIGNAL_BIT,
24 };
25 
26 static void io_eventfd_free(struct rcu_head *rcu)
27 {
28 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
29 
30 	eventfd_ctx_put(ev_fd->cq_ev_fd);
31 	kfree(ev_fd);
32 }
33 
34 static void io_eventfd_do_signal(struct rcu_head *rcu)
35 {
36 	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
37 
38 	eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
39 
40 	if (refcount_dec_and_test(&ev_fd->refs))
41 		io_eventfd_free(rcu);
42 }
43 
44 void io_eventfd_signal(struct io_ring_ctx *ctx)
45 {
46 	struct io_ev_fd *ev_fd = NULL;
47 
48 	if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
49 		return;
50 
51 	guard(rcu)();
52 
53 	/*
54 	 * rcu_dereference ctx->io_ev_fd once and use it for both for checking
55 	 * and eventfd_signal
56 	 */
57 	ev_fd = rcu_dereference(ctx->io_ev_fd);
58 
59 	/*
60 	 * Check again if ev_fd exists incase an io_eventfd_unregister call
61 	 * completed between the NULL check of ctx->io_ev_fd at the start of
62 	 * the function and rcu_read_lock.
63 	 */
64 	if (unlikely(!ev_fd))
65 		return;
66 	if (!refcount_inc_not_zero(&ev_fd->refs))
67 		return;
68 	if (ev_fd->eventfd_async && !io_wq_current_is_worker())
69 		goto out;
70 
71 	if (likely(eventfd_signal_allowed())) {
72 		eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
73 	} else {
74 		if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
75 			call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
76 			return;
77 		}
78 	}
79 out:
80 	if (refcount_dec_and_test(&ev_fd->refs))
81 		call_rcu(&ev_fd->rcu, io_eventfd_free);
82 }
83 
84 void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
85 {
86 	bool skip;
87 
88 	spin_lock(&ctx->completion_lock);
89 
90 	/*
91 	 * Eventfd should only get triggered when at least one event has been
92 	 * posted. Some applications rely on the eventfd notification count
93 	 * only changing IFF a new CQE has been added to the CQ ring. There's
94 	 * no depedency on 1:1 relationship between how many times this
95 	 * function is called (and hence the eventfd count) and number of CQEs
96 	 * posted to the CQ ring.
97 	 */
98 	skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail;
99 	ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
100 	spin_unlock(&ctx->completion_lock);
101 	if (skip)
102 		return;
103 
104 	io_eventfd_signal(ctx);
105 }
106 
107 int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
108 			unsigned int eventfd_async)
109 {
110 	struct io_ev_fd *ev_fd;
111 	__s32 __user *fds = arg;
112 	int fd;
113 
114 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
115 					lockdep_is_held(&ctx->uring_lock));
116 	if (ev_fd)
117 		return -EBUSY;
118 
119 	if (copy_from_user(&fd, fds, sizeof(*fds)))
120 		return -EFAULT;
121 
122 	ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL);
123 	if (!ev_fd)
124 		return -ENOMEM;
125 
126 	ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd);
127 	if (IS_ERR(ev_fd->cq_ev_fd)) {
128 		int ret = PTR_ERR(ev_fd->cq_ev_fd);
129 
130 		kfree(ev_fd);
131 		return ret;
132 	}
133 
134 	spin_lock(&ctx->completion_lock);
135 	ctx->evfd_last_cq_tail = ctx->cached_cq_tail;
136 	spin_unlock(&ctx->completion_lock);
137 
138 	ev_fd->eventfd_async = eventfd_async;
139 	ctx->has_evfd = true;
140 	refcount_set(&ev_fd->refs, 1);
141 	atomic_set(&ev_fd->ops, 0);
142 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
143 	return 0;
144 }
145 
146 int io_eventfd_unregister(struct io_ring_ctx *ctx)
147 {
148 	struct io_ev_fd *ev_fd;
149 
150 	ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
151 					lockdep_is_held(&ctx->uring_lock));
152 	if (ev_fd) {
153 		ctx->has_evfd = false;
154 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
155 		if (refcount_dec_and_test(&ev_fd->refs))
156 			call_rcu(&ev_fd->rcu, io_eventfd_free);
157 		return 0;
158 	}
159 
160 	return -ENXIO;
161 }
162