xref: /freebsd/sys/kern/sys_eventfd.c (revision c203bd70b5957f85616424b6fa374479372d06e3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2007 Roman Divacky
5  * Copyright (c) 2014 Dmitry Chagin
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/malloc.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/mutex.h>
40 #include <sys/types.h>
41 #include <sys/user.h>
42 #include <sys/fcntl.h>
43 #include <sys/file.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/stat.h>
47 #include <sys/errno.h>
48 #include <sys/event.h>
49 #include <sys/poll.h>
50 #include <sys/proc.h>
51 #include <sys/uio.h>
52 #include <sys/selinfo.h>
53 #include <sys/eventfd.h>
54 
55 #include <security/audit/audit.h>
56 
57 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
58 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
59 
60 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
61 
62 static fo_rdwr_t	eventfd_read;
63 static fo_rdwr_t	eventfd_write;
64 static fo_ioctl_t	eventfd_ioctl;
65 static fo_poll_t	eventfd_poll;
66 static fo_kqfilter_t	eventfd_kqfilter;
67 static fo_stat_t	eventfd_stat;
68 static fo_close_t	eventfd_close;
69 static fo_fill_kinfo_t	eventfd_fill_kinfo;
70 
71 static struct fileops eventfdops = {
72 	.fo_read = eventfd_read,
73 	.fo_write = eventfd_write,
74 	.fo_truncate = invfo_truncate,
75 	.fo_ioctl = eventfd_ioctl,
76 	.fo_poll = eventfd_poll,
77 	.fo_kqfilter = eventfd_kqfilter,
78 	.fo_stat = eventfd_stat,
79 	.fo_close = eventfd_close,
80 	.fo_chmod = invfo_chmod,
81 	.fo_chown = invfo_chown,
82 	.fo_sendfile = invfo_sendfile,
83 	.fo_fill_kinfo = eventfd_fill_kinfo,
84 	.fo_flags = DFLAG_PASSABLE
85 };
86 
87 static void	filt_eventfddetach(struct knote *kn);
88 static int	filt_eventfdread(struct knote *kn, long hint);
89 static int	filt_eventfdwrite(struct knote *kn, long hint);
90 
91 static struct filterops eventfd_rfiltops = {
92 	.f_isfd = 1,
93 	.f_detach = filt_eventfddetach,
94 	.f_event = filt_eventfdread
95 };
96 
97 static struct filterops eventfd_wfiltops = {
98 	.f_isfd = 1,
99 	.f_detach = filt_eventfddetach,
100 	.f_event = filt_eventfdwrite
101 };
102 
103 struct eventfd {
104 	eventfd_t	efd_count;
105 	uint32_t	efd_flags;
106 	struct selinfo	efd_sel;
107 	struct mtx	efd_lock;
108 };
109 
110 int
111 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
112     int flags)
113 {
114 	struct eventfd *efd;
115 	int fflags;
116 
117 	AUDIT_ARG_FFLAGS(flags);
118 	AUDIT_ARG_VALUE(initval);
119 
120 	efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
121 	efd->efd_flags = flags;
122 	efd->efd_count = initval;
123 	mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
124 	knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
125 
126 	fflags = FREAD | FWRITE;
127 	if ((flags & EFD_NONBLOCK) != 0)
128 		fflags |= FNONBLOCK;
129 	finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
130 
131 	return (0);
132 }
133 
134 static int
135 eventfd_close(struct file *fp, struct thread *td)
136 {
137 	struct eventfd *efd;
138 
139 	efd = fp->f_data;
140 	seldrain(&efd->efd_sel);
141 	knlist_destroy(&efd->efd_sel.si_note);
142 	mtx_destroy(&efd->efd_lock);
143 	free(efd, M_EVENTFD);
144 	return (0);
145 }
146 
147 static int
148 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
149     int flags, struct thread *td)
150 {
151 	struct eventfd *efd;
152 	eventfd_t count;
153 	int error;
154 
155 	if (uio->uio_resid < sizeof(eventfd_t))
156 		return (EINVAL);
157 
158 	error = 0;
159 	efd = fp->f_data;
160 	mtx_lock(&efd->efd_lock);
161 	while (error == 0 && efd->efd_count == 0) {
162 		if ((fp->f_flag & FNONBLOCK) != 0) {
163 			mtx_unlock(&efd->efd_lock);
164 			return (EAGAIN);
165 		}
166 		error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
167 		    "efdrd", 0);
168 	}
169 	if (error == 0) {
170 		MPASS(efd->efd_count > 0);
171 		if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
172 			count = 1;
173 			--efd->efd_count;
174 		} else {
175 			count = efd->efd_count;
176 			efd->efd_count = 0;
177 		}
178 		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
179 		selwakeup(&efd->efd_sel);
180 		wakeup(&efd->efd_count);
181 		mtx_unlock(&efd->efd_lock);
182 		error = uiomove(&count, sizeof(eventfd_t), uio);
183 	} else
184 		mtx_unlock(&efd->efd_lock);
185 
186 	return (error);
187 }
188 
189 static int
190 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
191     int flags, struct thread *td)
192 {
193 	struct eventfd *efd;
194 	eventfd_t count;
195 	int error;
196 
197 	if (uio->uio_resid < sizeof(eventfd_t))
198 		return (EINVAL);
199 
200 	error = uiomove(&count, sizeof(eventfd_t), uio);
201 	if (error != 0)
202 		return (error);
203 	if (count == UINT64_MAX)
204 		return (EINVAL);
205 
206 	efd = fp->f_data;
207 	mtx_lock(&efd->efd_lock);
208 retry:
209 	if (UINT64_MAX - efd->efd_count <= count) {
210 		if ((fp->f_flag & FNONBLOCK) != 0) {
211 			mtx_unlock(&efd->efd_lock);
212 			/* Do not not return the number of bytes written */
213 			uio->uio_resid += sizeof(eventfd_t);
214 			return (EAGAIN);
215 		}
216 		error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
217 		    PCATCH, "efdwr", 0);
218 		if (error == 0)
219 			goto retry;
220 	}
221 	if (error == 0) {
222 		MPASS(UINT64_MAX - efd->efd_count > count);
223 		efd->efd_count += count;
224 		KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
225 		selwakeup(&efd->efd_sel);
226 		wakeup(&efd->efd_count);
227 	}
228 	mtx_unlock(&efd->efd_lock);
229 
230 	return (error);
231 }
232 
233 static int
234 eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
235     struct thread *td)
236 {
237 	struct eventfd *efd;
238 	int revents;
239 
240 	efd = fp->f_data;
241 	revents = 0;
242 	mtx_lock(&efd->efd_lock);
243 	if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
244 		revents |= events & (POLLIN | POLLRDNORM);
245 	if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
246 	    efd->efd_count)
247 		revents |= events & (POLLOUT | POLLWRNORM);
248 	if (revents == 0)
249 		selrecord(td, &efd->efd_sel);
250 	mtx_unlock(&efd->efd_lock);
251 
252 	return (revents);
253 }
254 
255 static int
256 eventfd_kqfilter(struct file *fp, struct knote *kn)
257 {
258 	struct eventfd *efd = fp->f_data;
259 
260 	mtx_lock(&efd->efd_lock);
261 	switch (kn->kn_filter) {
262 	case EVFILT_READ:
263 		kn->kn_fop = &eventfd_rfiltops;
264 		break;
265 	case EVFILT_WRITE:
266 		kn->kn_fop = &eventfd_wfiltops;
267 		break;
268 	default:
269 		mtx_unlock(&efd->efd_lock);
270 		return (EINVAL);
271 	}
272 
273 	kn->kn_hook = efd;
274 	knlist_add(&efd->efd_sel.si_note, kn, 1);
275 	mtx_unlock(&efd->efd_lock);
276 
277 	return (0);
278 }
279 
280 static void
281 filt_eventfddetach(struct knote *kn)
282 {
283 	struct eventfd *efd = kn->kn_hook;
284 
285 	mtx_lock(&efd->efd_lock);
286 	knlist_remove(&efd->efd_sel.si_note, kn, 1);
287 	mtx_unlock(&efd->efd_lock);
288 }
289 
290 static int
291 filt_eventfdread(struct knote *kn, long hint)
292 {
293 	struct eventfd *efd = kn->kn_hook;
294 	int ret;
295 
296 	mtx_assert(&efd->efd_lock, MA_OWNED);
297 	kn->kn_data = (int64_t)efd->efd_count;
298 	ret = efd->efd_count > 0;
299 
300 	return (ret);
301 }
302 
303 static int
304 filt_eventfdwrite(struct knote *kn, long hint)
305 {
306 	struct eventfd *efd = kn->kn_hook;
307 	int ret;
308 
309 	mtx_assert(&efd->efd_lock, MA_OWNED);
310 	kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
311 	ret = UINT64_MAX - 1 > efd->efd_count;
312 
313 	return (ret);
314 }
315 
316 static int
317 eventfd_ioctl(struct file *fp, u_long cmd, void *data,
318     struct ucred *active_cred, struct thread *td)
319 {
320 	switch (cmd) {
321 	case FIONBIO:
322 	case FIOASYNC:
323 		return (0);
324 	}
325 
326 	return (ENOTTY);
327 }
328 
329 static int
330 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
331     struct thread *td)
332 {
333 	bzero((void *)st, sizeof *st);
334 	st->st_mode = S_IFIFO;
335 	return (0);
336 }
337 
338 static int
339 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
340 {
341 	struct eventfd *efd = fp->f_data;
342 
343 	kif->kf_type = KF_TYPE_EVENTFD;
344 	mtx_lock(&efd->efd_lock);
345 	kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
346 	kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
347 	mtx_unlock(&efd->efd_lock);
348 	return (0);
349 }
350