17a202823SKonstantin Belousov /*- 27a202823SKonstantin Belousov * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 37a202823SKonstantin Belousov * 41ca6b15bSDmitry Chagin * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 57a202823SKonstantin Belousov * 67a202823SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 77a202823SKonstantin Belousov * modification, are permitted provided that the following conditions 87a202823SKonstantin Belousov * are met: 97a202823SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 107a202823SKonstantin Belousov * notice, this list of conditions and the following disclaimer. 117a202823SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 127a202823SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 137a202823SKonstantin Belousov * documentation and/or other materials provided with the distribution. 147a202823SKonstantin Belousov * 157a202823SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 167a202823SKonstantin Belousov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 177a202823SKonstantin Belousov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 187a202823SKonstantin Belousov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 197a202823SKonstantin Belousov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 207a202823SKonstantin Belousov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 217a202823SKonstantin Belousov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 227a202823SKonstantin Belousov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 237a202823SKonstantin Belousov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 247a202823SKonstantin Belousov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 257a202823SKonstantin Belousov * SUCH DAMAGE. 267a202823SKonstantin Belousov */ 277a202823SKonstantin Belousov 287a202823SKonstantin Belousov #include <sys/cdefs.h> 297a202823SKonstantin Belousov __FBSDID("$FreeBSD$"); 307a202823SKonstantin Belousov 317a202823SKonstantin Belousov #include <sys/param.h> 327a202823SKonstantin Belousov #include <sys/systm.h> 337a202823SKonstantin Belousov #include <sys/kernel.h> 347a202823SKonstantin Belousov #include <sys/malloc.h> 357a202823SKonstantin Belousov #include <sys/limits.h> 367a202823SKonstantin Belousov #include <sys/lock.h> 377a202823SKonstantin Belousov #include <sys/mutex.h> 387a202823SKonstantin Belousov #include <sys/types.h> 397a202823SKonstantin Belousov #include <sys/user.h> 407a202823SKonstantin Belousov #include <sys/fcntl.h> 417a202823SKonstantin Belousov #include <sys/file.h> 427a202823SKonstantin Belousov #include <sys/filedesc.h> 437a202823SKonstantin Belousov #include <sys/filio.h> 447a202823SKonstantin Belousov #include <sys/stat.h> 457a202823SKonstantin Belousov #include <sys/errno.h> 467a202823SKonstantin Belousov #include <sys/event.h> 477a202823SKonstantin Belousov #include <sys/poll.h> 487a202823SKonstantin Belousov #include <sys/proc.h> 497a202823SKonstantin Belousov #include <sys/uio.h> 507a202823SKonstantin Belousov #include <sys/selinfo.h> 517a202823SKonstantin Belousov #include <sys/eventfd.h> 527a202823SKonstantin Belousov 537a202823SKonstantin Belousov #include <security/audit/audit.h> 547a202823SKonstantin Belousov 557a202823SKonstantin Belousov _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 567a202823SKonstantin Belousov _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 577a202823SKonstantin Belousov 587a202823SKonstantin Belousov MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 597a202823SKonstantin Belousov 607a202823SKonstantin Belousov static fo_rdwr_t eventfd_read; 617a202823SKonstantin Belousov static fo_rdwr_t eventfd_write; 627a202823SKonstantin Belousov static fo_ioctl_t eventfd_ioctl; 637a202823SKonstantin Belousov static fo_poll_t eventfd_poll; 647a202823SKonstantin Belousov static fo_kqfilter_t eventfd_kqfilter; 657a202823SKonstantin Belousov static fo_stat_t eventfd_stat; 667a202823SKonstantin Belousov static fo_close_t eventfd_close; 677a202823SKonstantin Belousov static fo_fill_kinfo_t eventfd_fill_kinfo; 687a202823SKonstantin Belousov 697a202823SKonstantin Belousov static struct fileops eventfdops = { 707a202823SKonstantin Belousov .fo_read = eventfd_read, 717a202823SKonstantin Belousov .fo_write = eventfd_write, 727a202823SKonstantin Belousov .fo_truncate = invfo_truncate, 737a202823SKonstantin Belousov .fo_ioctl = eventfd_ioctl, 747a202823SKonstantin Belousov .fo_poll = eventfd_poll, 757a202823SKonstantin Belousov .fo_kqfilter = eventfd_kqfilter, 767a202823SKonstantin Belousov .fo_stat = eventfd_stat, 777a202823SKonstantin Belousov .fo_close = eventfd_close, 787a202823SKonstantin Belousov .fo_chmod = invfo_chmod, 797a202823SKonstantin Belousov .fo_chown = invfo_chown, 807a202823SKonstantin Belousov .fo_sendfile = invfo_sendfile, 817a202823SKonstantin Belousov .fo_fill_kinfo = eventfd_fill_kinfo, 827a202823SKonstantin Belousov .fo_flags = DFLAG_PASSABLE 837a202823SKonstantin Belousov }; 847a202823SKonstantin Belousov 857a202823SKonstantin Belousov static void filt_eventfddetach(struct knote *kn); 867a202823SKonstantin Belousov static int filt_eventfdread(struct knote *kn, long hint); 877a202823SKonstantin Belousov static int filt_eventfdwrite(struct knote *kn, long hint); 887a202823SKonstantin Belousov 897a202823SKonstantin Belousov static struct filterops eventfd_rfiltops = { 907a202823SKonstantin Belousov .f_isfd = 1, 917a202823SKonstantin Belousov .f_detach = filt_eventfddetach, 927a202823SKonstantin Belousov .f_event = filt_eventfdread 937a202823SKonstantin Belousov }; 947a202823SKonstantin Belousov 957a202823SKonstantin Belousov static struct filterops eventfd_wfiltops = { 967a202823SKonstantin Belousov .f_isfd = 1, 977a202823SKonstantin Belousov .f_detach = filt_eventfddetach, 987a202823SKonstantin Belousov .f_event = filt_eventfdwrite 997a202823SKonstantin Belousov }; 1007a202823SKonstantin Belousov 1017a202823SKonstantin Belousov struct eventfd { 1027a202823SKonstantin Belousov eventfd_t efd_count; 1037a202823SKonstantin Belousov uint32_t efd_flags; 1047a202823SKonstantin Belousov struct selinfo efd_sel; 1057a202823SKonstantin Belousov struct mtx efd_lock; 1067a202823SKonstantin Belousov }; 1077a202823SKonstantin Belousov 1087a202823SKonstantin Belousov int 1097a202823SKonstantin Belousov eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 1107a202823SKonstantin Belousov int flags) 1117a202823SKonstantin Belousov { 1127a202823SKonstantin Belousov struct eventfd *efd; 1137a202823SKonstantin Belousov int fflags; 1147a202823SKonstantin Belousov 1157a202823SKonstantin Belousov AUDIT_ARG_FFLAGS(flags); 1167a202823SKonstantin Belousov AUDIT_ARG_VALUE(initval); 1177a202823SKonstantin Belousov 1187a202823SKonstantin Belousov efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 1197a202823SKonstantin Belousov efd->efd_flags = flags; 1207a202823SKonstantin Belousov efd->efd_count = initval; 1217a202823SKonstantin Belousov mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 1227a202823SKonstantin Belousov knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 1237a202823SKonstantin Belousov 1247a202823SKonstantin Belousov fflags = FREAD | FWRITE; 1257a202823SKonstantin Belousov if ((flags & EFD_NONBLOCK) != 0) 1267a202823SKonstantin Belousov fflags |= FNONBLOCK; 1277a202823SKonstantin Belousov finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 1287a202823SKonstantin Belousov 1297a202823SKonstantin Belousov return (0); 1307a202823SKonstantin Belousov } 1317a202823SKonstantin Belousov 1327a202823SKonstantin Belousov static int 1337a202823SKonstantin Belousov eventfd_close(struct file *fp, struct thread *td) 1347a202823SKonstantin Belousov { 1357a202823SKonstantin Belousov struct eventfd *efd; 1367a202823SKonstantin Belousov 1377a202823SKonstantin Belousov efd = fp->f_data; 1387a202823SKonstantin Belousov seldrain(&efd->efd_sel); 1397a202823SKonstantin Belousov knlist_destroy(&efd->efd_sel.si_note); 1407a202823SKonstantin Belousov mtx_destroy(&efd->efd_lock); 1417a202823SKonstantin Belousov free(efd, M_EVENTFD); 1427a202823SKonstantin Belousov return (0); 1437a202823SKonstantin Belousov } 1447a202823SKonstantin Belousov 1457a202823SKonstantin Belousov static int 1467a202823SKonstantin Belousov eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 1477a202823SKonstantin Belousov int flags, struct thread *td) 1487a202823SKonstantin Belousov { 1497a202823SKonstantin Belousov struct eventfd *efd; 1507a202823SKonstantin Belousov eventfd_t count; 1517a202823SKonstantin Belousov int error; 1527a202823SKonstantin Belousov 1537a202823SKonstantin Belousov if (uio->uio_resid < sizeof(eventfd_t)) 1547a202823SKonstantin Belousov return (EINVAL); 1557a202823SKonstantin Belousov 1567a202823SKonstantin Belousov error = 0; 1577a202823SKonstantin Belousov efd = fp->f_data; 1587a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 1597a202823SKonstantin Belousov while (error == 0 && efd->efd_count == 0) { 1607a202823SKonstantin Belousov if ((fp->f_flag & FNONBLOCK) != 0) { 1617a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 1627a202823SKonstantin Belousov return (EAGAIN); 1637a202823SKonstantin Belousov } 1647a202823SKonstantin Belousov error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 1657a202823SKonstantin Belousov "efdrd", 0); 1667a202823SKonstantin Belousov } 1677a202823SKonstantin Belousov if (error == 0) { 1687a202823SKonstantin Belousov MPASS(efd->efd_count > 0); 1697a202823SKonstantin Belousov if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 1707a202823SKonstantin Belousov count = 1; 1717a202823SKonstantin Belousov --efd->efd_count; 1727a202823SKonstantin Belousov } else { 1737a202823SKonstantin Belousov count = efd->efd_count; 1747a202823SKonstantin Belousov efd->efd_count = 0; 1757a202823SKonstantin Belousov } 1767a202823SKonstantin Belousov KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 1777a202823SKonstantin Belousov selwakeup(&efd->efd_sel); 1787a202823SKonstantin Belousov wakeup(&efd->efd_count); 1797a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 1807a202823SKonstantin Belousov error = uiomove(&count, sizeof(eventfd_t), uio); 1817a202823SKonstantin Belousov } else 1827a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 1837a202823SKonstantin Belousov 1847a202823SKonstantin Belousov return (error); 1857a202823SKonstantin Belousov } 1867a202823SKonstantin Belousov 1877a202823SKonstantin Belousov static int 1887a202823SKonstantin Belousov eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 1897a202823SKonstantin Belousov int flags, struct thread *td) 1907a202823SKonstantin Belousov { 1917a202823SKonstantin Belousov struct eventfd *efd; 1927a202823SKonstantin Belousov eventfd_t count; 1937a202823SKonstantin Belousov int error; 1947a202823SKonstantin Belousov 1957a202823SKonstantin Belousov if (uio->uio_resid < sizeof(eventfd_t)) 1967a202823SKonstantin Belousov return (EINVAL); 1977a202823SKonstantin Belousov 1987a202823SKonstantin Belousov error = uiomove(&count, sizeof(eventfd_t), uio); 1997a202823SKonstantin Belousov if (error != 0) 2007a202823SKonstantin Belousov return (error); 2017a202823SKonstantin Belousov if (count == UINT64_MAX) 2027a202823SKonstantin Belousov return (EINVAL); 2037a202823SKonstantin Belousov 2047a202823SKonstantin Belousov efd = fp->f_data; 2057a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2067a202823SKonstantin Belousov retry: 2077a202823SKonstantin Belousov if (UINT64_MAX - efd->efd_count <= count) { 2087a202823SKonstantin Belousov if ((fp->f_flag & FNONBLOCK) != 0) { 2097a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2107a202823SKonstantin Belousov /* Do not not return the number of bytes written */ 2117a202823SKonstantin Belousov uio->uio_resid += sizeof(eventfd_t); 2127a202823SKonstantin Belousov return (EAGAIN); 2137a202823SKonstantin Belousov } 2147a202823SKonstantin Belousov error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 2157a202823SKonstantin Belousov PCATCH, "efdwr", 0); 2167a202823SKonstantin Belousov if (error == 0) 2177a202823SKonstantin Belousov goto retry; 2187a202823SKonstantin Belousov } 2197a202823SKonstantin Belousov if (error == 0) { 2207a202823SKonstantin Belousov MPASS(UINT64_MAX - efd->efd_count > count); 2217a202823SKonstantin Belousov efd->efd_count += count; 2227a202823SKonstantin Belousov KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 2237a202823SKonstantin Belousov selwakeup(&efd->efd_sel); 2247a202823SKonstantin Belousov wakeup(&efd->efd_count); 2257a202823SKonstantin Belousov } 2267a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2277a202823SKonstantin Belousov 2287a202823SKonstantin Belousov return (error); 2297a202823SKonstantin Belousov } 2307a202823SKonstantin Belousov 2317a202823SKonstantin Belousov static int 2327a202823SKonstantin Belousov eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 2337a202823SKonstantin Belousov struct thread *td) 2347a202823SKonstantin Belousov { 2357a202823SKonstantin Belousov struct eventfd *efd; 2367a202823SKonstantin Belousov int revents; 2377a202823SKonstantin Belousov 2387a202823SKonstantin Belousov efd = fp->f_data; 2397a202823SKonstantin Belousov revents = 0; 2407a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2417a202823SKonstantin Belousov if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 2427a202823SKonstantin Belousov revents |= events & (POLLIN | POLLRDNORM); 2437a202823SKonstantin Belousov if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 2447a202823SKonstantin Belousov efd->efd_count) 2457a202823SKonstantin Belousov revents |= events & (POLLOUT | POLLWRNORM); 2467a202823SKonstantin Belousov if (revents == 0) 2477a202823SKonstantin Belousov selrecord(td, &efd->efd_sel); 2487a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2497a202823SKonstantin Belousov 2507a202823SKonstantin Belousov return (revents); 2517a202823SKonstantin Belousov } 2527a202823SKonstantin Belousov 2537a202823SKonstantin Belousov static int 2547a202823SKonstantin Belousov eventfd_kqfilter(struct file *fp, struct knote *kn) 2557a202823SKonstantin Belousov { 2567a202823SKonstantin Belousov struct eventfd *efd = fp->f_data; 2577a202823SKonstantin Belousov 2587a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2597a202823SKonstantin Belousov switch (kn->kn_filter) { 2607a202823SKonstantin Belousov case EVFILT_READ: 2617a202823SKonstantin Belousov kn->kn_fop = &eventfd_rfiltops; 2627a202823SKonstantin Belousov break; 2637a202823SKonstantin Belousov case EVFILT_WRITE: 2647a202823SKonstantin Belousov kn->kn_fop = &eventfd_wfiltops; 2657a202823SKonstantin Belousov break; 2667a202823SKonstantin Belousov default: 2677a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2687a202823SKonstantin Belousov return (EINVAL); 2697a202823SKonstantin Belousov } 2707a202823SKonstantin Belousov 2717a202823SKonstantin Belousov kn->kn_hook = efd; 2727a202823SKonstantin Belousov knlist_add(&efd->efd_sel.si_note, kn, 1); 2737a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2747a202823SKonstantin Belousov 2757a202823SKonstantin Belousov return (0); 2767a202823SKonstantin Belousov } 2777a202823SKonstantin Belousov 2787a202823SKonstantin Belousov static void 2797a202823SKonstantin Belousov filt_eventfddetach(struct knote *kn) 2807a202823SKonstantin Belousov { 2817a202823SKonstantin Belousov struct eventfd *efd = kn->kn_hook; 2827a202823SKonstantin Belousov 2837a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 2847a202823SKonstantin Belousov knlist_remove(&efd->efd_sel.si_note, kn, 1); 2857a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 2867a202823SKonstantin Belousov } 2877a202823SKonstantin Belousov 2887a202823SKonstantin Belousov static int 2897a202823SKonstantin Belousov filt_eventfdread(struct knote *kn, long hint) 2907a202823SKonstantin Belousov { 2917a202823SKonstantin Belousov struct eventfd *efd = kn->kn_hook; 2927a202823SKonstantin Belousov int ret; 2937a202823SKonstantin Belousov 2947a202823SKonstantin Belousov mtx_assert(&efd->efd_lock, MA_OWNED); 2957a202823SKonstantin Belousov kn->kn_data = (int64_t)efd->efd_count; 2967a202823SKonstantin Belousov ret = efd->efd_count > 0; 2977a202823SKonstantin Belousov 2987a202823SKonstantin Belousov return (ret); 2997a202823SKonstantin Belousov } 3007a202823SKonstantin Belousov 3017a202823SKonstantin Belousov static int 3027a202823SKonstantin Belousov filt_eventfdwrite(struct knote *kn, long hint) 3037a202823SKonstantin Belousov { 3047a202823SKonstantin Belousov struct eventfd *efd = kn->kn_hook; 3057a202823SKonstantin Belousov int ret; 3067a202823SKonstantin Belousov 3077a202823SKonstantin Belousov mtx_assert(&efd->efd_lock, MA_OWNED); 3087a202823SKonstantin Belousov kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 3097a202823SKonstantin Belousov ret = UINT64_MAX - 1 > efd->efd_count; 3107a202823SKonstantin Belousov 3117a202823SKonstantin Belousov return (ret); 3127a202823SKonstantin Belousov } 3137a202823SKonstantin Belousov 3147a202823SKonstantin Belousov static int 3157a202823SKonstantin Belousov eventfd_ioctl(struct file *fp, u_long cmd, void *data, 3167a202823SKonstantin Belousov struct ucred *active_cred, struct thread *td) 3177a202823SKonstantin Belousov { 3187a202823SKonstantin Belousov switch (cmd) { 3197a202823SKonstantin Belousov case FIONBIO: 3207a202823SKonstantin Belousov case FIOASYNC: 3217a202823SKonstantin Belousov return (0); 3227a202823SKonstantin Belousov } 3237a202823SKonstantin Belousov 3247a202823SKonstantin Belousov return (ENOTTY); 3257a202823SKonstantin Belousov } 3267a202823SKonstantin Belousov 3277a202823SKonstantin Belousov static int 328*2b68eb8eSMateusz Guzik eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 3297a202823SKonstantin Belousov { 3307a202823SKonstantin Belousov bzero((void *)st, sizeof *st); 3317a202823SKonstantin Belousov st->st_mode = S_IFIFO; 3327a202823SKonstantin Belousov return (0); 3337a202823SKonstantin Belousov } 3347a202823SKonstantin Belousov 3357a202823SKonstantin Belousov static int 3367a202823SKonstantin Belousov eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 3377a202823SKonstantin Belousov { 3387a202823SKonstantin Belousov struct eventfd *efd = fp->f_data; 3397a202823SKonstantin Belousov 3407a202823SKonstantin Belousov kif->kf_type = KF_TYPE_EVENTFD; 3417a202823SKonstantin Belousov mtx_lock(&efd->efd_lock); 3427a202823SKonstantin Belousov kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 3437a202823SKonstantin Belousov kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 3447a202823SKonstantin Belousov mtx_unlock(&efd->efd_lock); 3457a202823SKonstantin Belousov return (0); 3467a202823SKonstantin Belousov } 347