1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/systm.h>
29 #include <sys/event.h>
30 #include <sys/eventfd.h>
31 #include <sys/errno.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/filio.h>
36 #include <sys/kernel.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/poll.h>
42 #include <sys/proc.h>
43 #include <sys/selinfo.h>
44 #include <sys/stat.h>
45 #include <sys/uio.h>
46 #include <sys/user.h>
47
48 #include <security/audit/audit.h>
49
50 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC");
51 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK");
52
53 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures");
54
55 static fo_rdwr_t eventfd_read;
56 static fo_rdwr_t eventfd_write;
57 static fo_ioctl_t eventfd_ioctl;
58 static fo_poll_t eventfd_poll;
59 static fo_kqfilter_t eventfd_kqfilter;
60 static fo_stat_t eventfd_stat;
61 static fo_close_t eventfd_close;
62 static fo_fill_kinfo_t eventfd_fill_kinfo;
63
64 static const struct fileops eventfdops = {
65 .fo_read = eventfd_read,
66 .fo_write = eventfd_write,
67 .fo_truncate = invfo_truncate,
68 .fo_ioctl = eventfd_ioctl,
69 .fo_poll = eventfd_poll,
70 .fo_kqfilter = eventfd_kqfilter,
71 .fo_stat = eventfd_stat,
72 .fo_close = eventfd_close,
73 .fo_chmod = invfo_chmod,
74 .fo_chown = invfo_chown,
75 .fo_sendfile = invfo_sendfile,
76 .fo_fill_kinfo = eventfd_fill_kinfo,
77 .fo_cmp = file_kcmp_generic,
78 .fo_flags = DFLAG_PASSABLE
79 };
80
81 static void filt_eventfddetach(struct knote *kn);
82 static int filt_eventfdread(struct knote *kn, long hint);
83 static int filt_eventfdwrite(struct knote *kn, long hint);
84
85 static const struct filterops eventfd_rfiltops = {
86 .f_isfd = 1,
87 .f_detach = filt_eventfddetach,
88 .f_event = filt_eventfdread
89 };
90
91 static const struct filterops eventfd_wfiltops = {
92 .f_isfd = 1,
93 .f_detach = filt_eventfddetach,
94 .f_event = filt_eventfdwrite
95 };
96
97 struct eventfd {
98 eventfd_t efd_count;
99 uint32_t efd_flags;
100 struct selinfo efd_sel;
101 struct mtx efd_lock;
102 };
103
104 int
eventfd_create_file(struct thread * td,struct file * fp,uint32_t initval,int flags)105 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval,
106 int flags)
107 {
108 struct eventfd *efd;
109 int fflags;
110
111 AUDIT_ARG_FFLAGS(flags);
112 AUDIT_ARG_VALUE(initval);
113
114 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO);
115 efd->efd_flags = flags;
116 efd->efd_count = initval;
117 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
118 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
119
120 fflags = FREAD | FWRITE;
121 if ((flags & EFD_NONBLOCK) != 0)
122 fflags |= FNONBLOCK;
123 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops);
124
125 return (0);
126 }
127
128 static int
eventfd_close(struct file * fp,struct thread * td)129 eventfd_close(struct file *fp, struct thread *td)
130 {
131 struct eventfd *efd;
132
133 efd = fp->f_data;
134 seldrain(&efd->efd_sel);
135 knlist_destroy(&efd->efd_sel.si_note);
136 mtx_destroy(&efd->efd_lock);
137 free(efd, M_EVENTFD);
138 return (0);
139 }
140
141 static int
eventfd_read(struct file * fp,struct uio * uio,struct ucred * active_cred,int flags,struct thread * td)142 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
143 int flags, struct thread *td)
144 {
145 struct eventfd *efd;
146 eventfd_t count;
147 int error;
148
149 if (uio->uio_resid < sizeof(eventfd_t))
150 return (EINVAL);
151
152 error = 0;
153 efd = fp->f_data;
154 mtx_lock(&efd->efd_lock);
155 while (error == 0 && efd->efd_count == 0) {
156 if ((fp->f_flag & FNONBLOCK) != 0) {
157 mtx_unlock(&efd->efd_lock);
158 return (EAGAIN);
159 }
160 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH,
161 "efdrd", 0);
162 }
163 if (error == 0) {
164 MPASS(efd->efd_count > 0);
165 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) {
166 count = 1;
167 --efd->efd_count;
168 } else {
169 count = efd->efd_count;
170 efd->efd_count = 0;
171 }
172 KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
173 selwakeup(&efd->efd_sel);
174 wakeup(&efd->efd_count);
175 mtx_unlock(&efd->efd_lock);
176 error = uiomove(&count, sizeof(eventfd_t), uio);
177 } else
178 mtx_unlock(&efd->efd_lock);
179
180 return (error);
181 }
182
183 static int
eventfd_write(struct file * fp,struct uio * uio,struct ucred * active_cred,int flags,struct thread * td)184 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
185 int flags, struct thread *td)
186 {
187 struct eventfd *efd;
188 eventfd_t count;
189 int error;
190
191 if (uio->uio_resid < sizeof(eventfd_t))
192 return (EINVAL);
193
194 error = uiomove(&count, sizeof(eventfd_t), uio);
195 if (error != 0)
196 return (error);
197 if (count == UINT64_MAX)
198 return (EINVAL);
199
200 efd = fp->f_data;
201 mtx_lock(&efd->efd_lock);
202 retry:
203 if (UINT64_MAX - efd->efd_count <= count) {
204 if ((fp->f_flag & FNONBLOCK) != 0) {
205 mtx_unlock(&efd->efd_lock);
206 /* Do not not return the number of bytes written */
207 uio->uio_resid += sizeof(eventfd_t);
208 return (EAGAIN);
209 }
210 error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
211 PCATCH, "efdwr", 0);
212 if (error == 0)
213 goto retry;
214 }
215 if (error == 0) {
216 MPASS(UINT64_MAX - efd->efd_count > count);
217 efd->efd_count += count;
218 KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
219 selwakeup(&efd->efd_sel);
220 wakeup(&efd->efd_count);
221 }
222 mtx_unlock(&efd->efd_lock);
223
224 return (error);
225 }
226
227 static int
eventfd_poll(struct file * fp,int events,struct ucred * active_cred,struct thread * td)228 eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
229 struct thread *td)
230 {
231 struct eventfd *efd;
232 int revents;
233
234 efd = fp->f_data;
235 revents = 0;
236 mtx_lock(&efd->efd_lock);
237 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0)
238 revents |= events & (POLLIN | POLLRDNORM);
239 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 >
240 efd->efd_count)
241 revents |= events & (POLLOUT | POLLWRNORM);
242 if (revents == 0)
243 selrecord(td, &efd->efd_sel);
244 mtx_unlock(&efd->efd_lock);
245
246 return (revents);
247 }
248
249 static int
eventfd_kqfilter(struct file * fp,struct knote * kn)250 eventfd_kqfilter(struct file *fp, struct knote *kn)
251 {
252 struct eventfd *efd = fp->f_data;
253
254 mtx_lock(&efd->efd_lock);
255 switch (kn->kn_filter) {
256 case EVFILT_READ:
257 kn->kn_fop = &eventfd_rfiltops;
258 break;
259 case EVFILT_WRITE:
260 kn->kn_fop = &eventfd_wfiltops;
261 break;
262 default:
263 mtx_unlock(&efd->efd_lock);
264 return (EINVAL);
265 }
266
267 kn->kn_hook = efd;
268 knlist_add(&efd->efd_sel.si_note, kn, 1);
269 mtx_unlock(&efd->efd_lock);
270
271 return (0);
272 }
273
274 static void
filt_eventfddetach(struct knote * kn)275 filt_eventfddetach(struct knote *kn)
276 {
277 struct eventfd *efd = kn->kn_hook;
278
279 mtx_lock(&efd->efd_lock);
280 knlist_remove(&efd->efd_sel.si_note, kn, 1);
281 mtx_unlock(&efd->efd_lock);
282 }
283
284 static int
filt_eventfdread(struct knote * kn,long hint)285 filt_eventfdread(struct knote *kn, long hint)
286 {
287 struct eventfd *efd = kn->kn_hook;
288 int ret;
289
290 mtx_assert(&efd->efd_lock, MA_OWNED);
291 kn->kn_data = (int64_t)efd->efd_count;
292 ret = efd->efd_count > 0;
293
294 return (ret);
295 }
296
297 static int
filt_eventfdwrite(struct knote * kn,long hint)298 filt_eventfdwrite(struct knote *kn, long hint)
299 {
300 struct eventfd *efd = kn->kn_hook;
301 int ret;
302
303 mtx_assert(&efd->efd_lock, MA_OWNED);
304 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count);
305 ret = UINT64_MAX - 1 > efd->efd_count;
306
307 return (ret);
308 }
309
310 static int
eventfd_ioctl(struct file * fp,u_long cmd,void * data,struct ucred * active_cred,struct thread * td)311 eventfd_ioctl(struct file *fp, u_long cmd, void *data,
312 struct ucred *active_cred, struct thread *td)
313 {
314 switch (cmd) {
315 case FIONBIO:
316 case FIOASYNC:
317 return (0);
318 }
319
320 return (ENOTTY);
321 }
322
323 static int
eventfd_stat(struct file * fp,struct stat * st,struct ucred * active_cred)324 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
325 {
326 bzero((void *)st, sizeof *st);
327 st->st_mode = S_IFIFO;
328 return (0);
329 }
330
331 static int
eventfd_fill_kinfo(struct file * fp,struct kinfo_file * kif,struct filedesc * fdp)332 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
333 {
334 struct eventfd *efd = fp->f_data;
335
336 kif->kf_type = KF_TYPE_EVENTFD;
337 mtx_lock(&efd->efd_lock);
338 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count;
339 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags;
340 kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd;
341 mtx_unlock(&efd->efd_lock);
342 return (0);
343 }
344