1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/systm.h> 29 #include <sys/event.h> 30 #include <sys/eventfd.h> 31 #include <sys/errno.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/refcount.h> 44 #include <sys/selinfo.h> 45 #include <sys/stat.h> 46 #include <sys/uio.h> 47 #include <sys/user.h> 48 49 #include <security/audit/audit.h> 50 51 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 52 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 53 54 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 55 56 static fo_rdwr_t eventfd_read; 57 static fo_rdwr_t eventfd_write; 58 static fo_ioctl_t eventfd_ioctl; 59 static fo_poll_t eventfd_poll; 60 static fo_kqfilter_t eventfd_kqfilter; 61 static fo_stat_t eventfd_stat; 62 static fo_close_t eventfd_close; 63 static fo_fill_kinfo_t eventfd_fill_kinfo; 64 65 static const struct fileops eventfdops = { 66 .fo_read = eventfd_read, 67 .fo_write = eventfd_write, 68 .fo_truncate = invfo_truncate, 69 .fo_ioctl = eventfd_ioctl, 70 .fo_poll = eventfd_poll, 71 .fo_kqfilter = eventfd_kqfilter, 72 .fo_stat = eventfd_stat, 73 .fo_close = eventfd_close, 74 .fo_chmod = invfo_chmod, 75 .fo_chown = invfo_chown, 76 .fo_sendfile = invfo_sendfile, 77 .fo_fill_kinfo = eventfd_fill_kinfo, 78 .fo_cmp = file_kcmp_generic, 79 .fo_flags = DFLAG_PASSABLE 80 }; 81 82 static void filt_eventfddetach(struct knote *kn); 83 static int filt_eventfdread(struct knote *kn, long hint); 84 static int filt_eventfdwrite(struct knote *kn, long hint); 85 86 static const struct filterops eventfd_rfiltops = { 87 .f_isfd = 1, 88 .f_detach = filt_eventfddetach, 89 .f_event = filt_eventfdread, 90 .f_copy = knote_triv_copy, 91 }; 92 93 94 static const struct filterops eventfd_wfiltops = { 95 .f_isfd = 1, 96 .f_detach = filt_eventfddetach, 97 .f_event = filt_eventfdwrite, 98 .f_copy = knote_triv_copy, 99 }; 100 101 struct eventfd { 102 eventfd_t efd_count; 103 uint32_t efd_flags; 104 struct selinfo efd_sel; 105 struct mtx efd_lock; 106 unsigned int efd_refcount; 107 }; 108 109 int 110 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 111 int flags) 112 { 113 struct eventfd *efd; 114 int fflags; 115 116 AUDIT_ARG_FFLAGS(flags); 117 AUDIT_ARG_VALUE(initval); 118 119 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 120 efd->efd_flags = flags; 121 efd->efd_count = initval; 122 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 123 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 124 refcount_init(&efd->efd_refcount, 1); 125 126 fflags = FREAD | FWRITE; 127 if ((flags & EFD_NONBLOCK) != 0) 128 fflags |= FNONBLOCK; 129 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 130 131 return (0); 132 } 133 134 struct eventfd * 135 eventfd_get(struct file *fp) 136 { 137 struct eventfd *efd; 138 139 if (fp->f_data == NULL || fp->f_ops != &eventfdops) 140 return (NULL); 141 142 efd = fp->f_data; 143 refcount_acquire(&efd->efd_refcount); 144 145 return (efd); 146 } 147 148 void 149 eventfd_put(struct eventfd *efd) 150 { 151 if (!refcount_release(&efd->efd_refcount)) 152 return; 153 154 seldrain(&efd->efd_sel); 155 knlist_destroy(&efd->efd_sel.si_note); 156 mtx_destroy(&efd->efd_lock); 157 free(efd, M_EVENTFD); 158 } 159 160 static void 161 eventfd_wakeup(struct eventfd *efd) 162 { 163 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 164 selwakeup(&efd->efd_sel); 165 wakeup(&efd->efd_count); 166 } 167 168 void 169 eventfd_signal(struct eventfd *efd) 170 { 171 mtx_lock(&efd->efd_lock); 172 173 if (efd->efd_count < UINT64_MAX) 174 efd->efd_count++; 175 176 eventfd_wakeup(efd); 177 178 mtx_unlock(&efd->efd_lock); 179 } 180 181 static int 182 eventfd_close(struct file *fp, struct thread *td) 183 { 184 struct eventfd *efd; 185 186 efd = fp->f_data; 187 eventfd_put(efd); 188 return (0); 189 } 190 191 static int 192 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 193 int flags, struct thread *td) 194 { 195 struct eventfd *efd; 196 eventfd_t count; 197 int error; 198 199 if (uio->uio_resid < sizeof(eventfd_t)) 200 return (EINVAL); 201 202 error = 0; 203 efd = fp->f_data; 204 mtx_lock(&efd->efd_lock); 205 while (error == 0 && efd->efd_count == 0) { 206 if ((fp->f_flag & FNONBLOCK) != 0) { 207 mtx_unlock(&efd->efd_lock); 208 return (EAGAIN); 209 } 210 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 211 "efdrd", 0); 212 } 213 if (error == 0) { 214 MPASS(efd->efd_count > 0); 215 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 216 count = 1; 217 --efd->efd_count; 218 } else { 219 count = efd->efd_count; 220 efd->efd_count = 0; 221 } 222 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 223 selwakeup(&efd->efd_sel); 224 wakeup(&efd->efd_count); 225 mtx_unlock(&efd->efd_lock); 226 error = uiomove(&count, sizeof(eventfd_t), uio); 227 } else 228 mtx_unlock(&efd->efd_lock); 229 230 return (error); 231 } 232 233 static int 234 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 235 int flags, struct thread *td) 236 { 237 struct eventfd *efd; 238 eventfd_t count; 239 int error; 240 241 if (uio->uio_resid < sizeof(eventfd_t)) 242 return (EINVAL); 243 244 error = uiomove(&count, sizeof(eventfd_t), uio); 245 if (error != 0) 246 return (error); 247 if (count == UINT64_MAX) 248 return (EINVAL); 249 250 efd = fp->f_data; 251 mtx_lock(&efd->efd_lock); 252 retry: 253 if (UINT64_MAX - efd->efd_count <= count) { 254 if ((fp->f_flag & FNONBLOCK) != 0) { 255 mtx_unlock(&efd->efd_lock); 256 /* Do not not return the number of bytes written */ 257 uio->uio_resid += sizeof(eventfd_t); 258 return (EAGAIN); 259 } 260 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 261 PCATCH, "efdwr", 0); 262 if (error == 0) 263 goto retry; 264 } 265 if (error == 0) { 266 MPASS(UINT64_MAX - efd->efd_count > count); 267 efd->efd_count += count; 268 eventfd_wakeup(efd); 269 } 270 mtx_unlock(&efd->efd_lock); 271 272 return (error); 273 } 274 275 static int 276 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 277 struct thread *td) 278 { 279 struct eventfd *efd; 280 int revents; 281 282 efd = fp->f_data; 283 revents = 0; 284 mtx_lock(&efd->efd_lock); 285 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 286 revents |= events & (POLLIN | POLLRDNORM); 287 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 288 efd->efd_count) 289 revents |= events & (POLLOUT | POLLWRNORM); 290 if (revents == 0) 291 selrecord(td, &efd->efd_sel); 292 mtx_unlock(&efd->efd_lock); 293 294 return (revents); 295 } 296 297 static int 298 eventfd_kqfilter(struct file *fp, struct knote *kn) 299 { 300 struct eventfd *efd = fp->f_data; 301 302 mtx_lock(&efd->efd_lock); 303 switch (kn->kn_filter) { 304 case EVFILT_READ: 305 kn->kn_fop = &eventfd_rfiltops; 306 break; 307 case EVFILT_WRITE: 308 kn->kn_fop = &eventfd_wfiltops; 309 break; 310 default: 311 mtx_unlock(&efd->efd_lock); 312 return (EINVAL); 313 } 314 315 kn->kn_hook = efd; 316 knlist_add(&efd->efd_sel.si_note, kn, 1); 317 mtx_unlock(&efd->efd_lock); 318 319 return (0); 320 } 321 322 static void 323 filt_eventfddetach(struct knote *kn) 324 { 325 struct eventfd *efd = kn->kn_hook; 326 327 mtx_lock(&efd->efd_lock); 328 knlist_remove(&efd->efd_sel.si_note, kn, 1); 329 mtx_unlock(&efd->efd_lock); 330 } 331 332 static int 333 filt_eventfdread(struct knote *kn, long hint) 334 { 335 struct eventfd *efd = kn->kn_hook; 336 int ret; 337 338 mtx_assert(&efd->efd_lock, MA_OWNED); 339 kn->kn_data = (int64_t)efd->efd_count; 340 ret = efd->efd_count > 0; 341 342 return (ret); 343 } 344 345 static int 346 filt_eventfdwrite(struct knote *kn, long hint) 347 { 348 struct eventfd *efd = kn->kn_hook; 349 int ret; 350 351 mtx_assert(&efd->efd_lock, MA_OWNED); 352 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 353 ret = UINT64_MAX - 1 > efd->efd_count; 354 355 return (ret); 356 } 357 358 static int 359 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 360 struct ucred *active_cred, struct thread *td) 361 { 362 switch (cmd) { 363 case FIONBIO: 364 case FIOASYNC: 365 return (0); 366 } 367 368 return (ENOTTY); 369 } 370 371 static int 372 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 373 { 374 bzero((void *)st, sizeof *st); 375 st->st_mode = S_IFIFO; 376 return (0); 377 } 378 379 static int 380 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 381 { 382 struct eventfd *efd = fp->f_data; 383 384 kif->kf_type = KF_TYPE_EVENTFD; 385 mtx_lock(&efd->efd_lock); 386 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 387 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 388 kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd; 389 mtx_unlock(&efd->efd_lock); 390 return (0); 391 } 392