1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/systm.h> 29 #include <sys/event.h> 30 #include <sys/eventfd.h> 31 #include <sys/errno.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/selinfo.h> 44 #include <sys/stat.h> 45 #include <sys/uio.h> 46 #include <sys/user.h> 47 48 #include <security/audit/audit.h> 49 50 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 51 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 52 53 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 54 55 static fo_rdwr_t eventfd_read; 56 static fo_rdwr_t eventfd_write; 57 static fo_ioctl_t eventfd_ioctl; 58 static fo_poll_t eventfd_poll; 59 static fo_kqfilter_t eventfd_kqfilter; 60 static fo_stat_t eventfd_stat; 61 static fo_close_t eventfd_close; 62 static fo_fill_kinfo_t eventfd_fill_kinfo; 63 64 static const struct fileops eventfdops = { 65 .fo_read = eventfd_read, 66 .fo_write = eventfd_write, 67 .fo_truncate = invfo_truncate, 68 .fo_ioctl = eventfd_ioctl, 69 .fo_poll = eventfd_poll, 70 .fo_kqfilter = eventfd_kqfilter, 71 .fo_stat = eventfd_stat, 72 .fo_close = eventfd_close, 73 .fo_chmod = invfo_chmod, 74 .fo_chown = invfo_chown, 75 .fo_sendfile = invfo_sendfile, 76 .fo_fill_kinfo = eventfd_fill_kinfo, 77 .fo_cmp = file_kcmp_generic, 78 .fo_flags = DFLAG_PASSABLE 79 }; 80 81 static void filt_eventfddetach(struct knote *kn); 82 static int filt_eventfdread(struct knote *kn, long hint); 83 static int filt_eventfdwrite(struct knote *kn, long hint); 84 85 static const struct filterops eventfd_rfiltops = { 86 .f_isfd = 1, 87 .f_detach = filt_eventfddetach, 88 .f_event = filt_eventfdread, 89 .f_copy = knote_triv_copy, 90 }; 91 92 93 static const struct filterops eventfd_wfiltops = { 94 .f_isfd = 1, 95 .f_detach = filt_eventfddetach, 96 .f_event = filt_eventfdwrite, 97 .f_copy = knote_triv_copy, 98 }; 99 100 struct eventfd { 101 eventfd_t efd_count; 102 uint32_t efd_flags; 103 struct selinfo efd_sel; 104 struct mtx efd_lock; 105 }; 106 107 int 108 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 109 int flags) 110 { 111 struct eventfd *efd; 112 int fflags; 113 114 AUDIT_ARG_FFLAGS(flags); 115 AUDIT_ARG_VALUE(initval); 116 117 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 118 efd->efd_flags = flags; 119 efd->efd_count = initval; 120 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 121 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 122 123 fflags = FREAD | FWRITE; 124 if ((flags & EFD_NONBLOCK) != 0) 125 fflags |= FNONBLOCK; 126 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 127 128 return (0); 129 } 130 131 static int 132 eventfd_close(struct file *fp, struct thread *td) 133 { 134 struct eventfd *efd; 135 136 efd = fp->f_data; 137 seldrain(&efd->efd_sel); 138 knlist_destroy(&efd->efd_sel.si_note); 139 mtx_destroy(&efd->efd_lock); 140 free(efd, M_EVENTFD); 141 return (0); 142 } 143 144 static int 145 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 146 int flags, struct thread *td) 147 { 148 struct eventfd *efd; 149 eventfd_t count; 150 int error; 151 152 if (uio->uio_resid < sizeof(eventfd_t)) 153 return (EINVAL); 154 155 error = 0; 156 efd = fp->f_data; 157 mtx_lock(&efd->efd_lock); 158 while (error == 0 && efd->efd_count == 0) { 159 if ((fp->f_flag & FNONBLOCK) != 0) { 160 mtx_unlock(&efd->efd_lock); 161 return (EAGAIN); 162 } 163 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 164 "efdrd", 0); 165 } 166 if (error == 0) { 167 MPASS(efd->efd_count > 0); 168 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 169 count = 1; 170 --efd->efd_count; 171 } else { 172 count = efd->efd_count; 173 efd->efd_count = 0; 174 } 175 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 176 selwakeup(&efd->efd_sel); 177 wakeup(&efd->efd_count); 178 mtx_unlock(&efd->efd_lock); 179 error = uiomove(&count, sizeof(eventfd_t), uio); 180 } else 181 mtx_unlock(&efd->efd_lock); 182 183 return (error); 184 } 185 186 static int 187 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 188 int flags, struct thread *td) 189 { 190 struct eventfd *efd; 191 eventfd_t count; 192 int error; 193 194 if (uio->uio_resid < sizeof(eventfd_t)) 195 return (EINVAL); 196 197 error = uiomove(&count, sizeof(eventfd_t), uio); 198 if (error != 0) 199 return (error); 200 if (count == UINT64_MAX) 201 return (EINVAL); 202 203 efd = fp->f_data; 204 mtx_lock(&efd->efd_lock); 205 retry: 206 if (UINT64_MAX - efd->efd_count <= count) { 207 if ((fp->f_flag & FNONBLOCK) != 0) { 208 mtx_unlock(&efd->efd_lock); 209 /* Do not not return the number of bytes written */ 210 uio->uio_resid += sizeof(eventfd_t); 211 return (EAGAIN); 212 } 213 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 214 PCATCH, "efdwr", 0); 215 if (error == 0) 216 goto retry; 217 } 218 if (error == 0) { 219 MPASS(UINT64_MAX - efd->efd_count > count); 220 efd->efd_count += count; 221 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 222 selwakeup(&efd->efd_sel); 223 wakeup(&efd->efd_count); 224 } 225 mtx_unlock(&efd->efd_lock); 226 227 return (error); 228 } 229 230 static int 231 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 232 struct thread *td) 233 { 234 struct eventfd *efd; 235 int revents; 236 237 efd = fp->f_data; 238 revents = 0; 239 mtx_lock(&efd->efd_lock); 240 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 241 revents |= events & (POLLIN | POLLRDNORM); 242 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 243 efd->efd_count) 244 revents |= events & (POLLOUT | POLLWRNORM); 245 if (revents == 0) 246 selrecord(td, &efd->efd_sel); 247 mtx_unlock(&efd->efd_lock); 248 249 return (revents); 250 } 251 252 static int 253 eventfd_kqfilter(struct file *fp, struct knote *kn) 254 { 255 struct eventfd *efd = fp->f_data; 256 257 mtx_lock(&efd->efd_lock); 258 switch (kn->kn_filter) { 259 case EVFILT_READ: 260 kn->kn_fop = &eventfd_rfiltops; 261 break; 262 case EVFILT_WRITE: 263 kn->kn_fop = &eventfd_wfiltops; 264 break; 265 default: 266 mtx_unlock(&efd->efd_lock); 267 return (EINVAL); 268 } 269 270 kn->kn_hook = efd; 271 knlist_add(&efd->efd_sel.si_note, kn, 1); 272 mtx_unlock(&efd->efd_lock); 273 274 return (0); 275 } 276 277 static void 278 filt_eventfddetach(struct knote *kn) 279 { 280 struct eventfd *efd = kn->kn_hook; 281 282 mtx_lock(&efd->efd_lock); 283 knlist_remove(&efd->efd_sel.si_note, kn, 1); 284 mtx_unlock(&efd->efd_lock); 285 } 286 287 static int 288 filt_eventfdread(struct knote *kn, long hint) 289 { 290 struct eventfd *efd = kn->kn_hook; 291 int ret; 292 293 mtx_assert(&efd->efd_lock, MA_OWNED); 294 kn->kn_data = (int64_t)efd->efd_count; 295 ret = efd->efd_count > 0; 296 297 return (ret); 298 } 299 300 static int 301 filt_eventfdwrite(struct knote *kn, long hint) 302 { 303 struct eventfd *efd = kn->kn_hook; 304 int ret; 305 306 mtx_assert(&efd->efd_lock, MA_OWNED); 307 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 308 ret = UINT64_MAX - 1 > efd->efd_count; 309 310 return (ret); 311 } 312 313 static int 314 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 315 struct ucred *active_cred, struct thread *td) 316 { 317 switch (cmd) { 318 case FIONBIO: 319 case FIOASYNC: 320 return (0); 321 } 322 323 return (ENOTTY); 324 } 325 326 static int 327 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 328 { 329 bzero((void *)st, sizeof *st); 330 st->st_mode = S_IFIFO; 331 return (0); 332 } 333 334 static int 335 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 336 { 337 struct eventfd *efd = fp->f_data; 338 339 kif->kf_type = KF_TYPE_EVENTFD; 340 mtx_lock(&efd->efd_lock); 341 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 342 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 343 kif->kf_un.kf_eventfd.kf_eventfd_addr = (uintptr_t)efd; 344 mtx_unlock(&efd->efd_lock); 345 return (0); 346 } 347