1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/types.h> 41 #include <sys/user.h> 42 #include <sys/fcntl.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/filio.h> 46 #include <sys/stat.h> 47 #include <sys/errno.h> 48 #include <sys/event.h> 49 #include <sys/poll.h> 50 #include <sys/proc.h> 51 #include <sys/uio.h> 52 #include <sys/selinfo.h> 53 #include <sys/eventfd.h> 54 55 #include <security/audit/audit.h> 56 57 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 58 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 59 60 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 61 62 static fo_rdwr_t eventfd_read; 63 static fo_rdwr_t eventfd_write; 64 static fo_ioctl_t eventfd_ioctl; 65 static fo_poll_t eventfd_poll; 66 static fo_kqfilter_t eventfd_kqfilter; 67 static fo_stat_t eventfd_stat; 68 static fo_close_t eventfd_close; 69 static fo_fill_kinfo_t eventfd_fill_kinfo; 70 71 static struct fileops eventfdops = { 72 .fo_read = eventfd_read, 73 .fo_write = eventfd_write, 74 .fo_truncate = invfo_truncate, 75 .fo_ioctl = eventfd_ioctl, 76 .fo_poll = eventfd_poll, 77 .fo_kqfilter = eventfd_kqfilter, 78 .fo_stat = eventfd_stat, 79 .fo_close = eventfd_close, 80 .fo_chmod = invfo_chmod, 81 .fo_chown = invfo_chown, 82 .fo_sendfile = invfo_sendfile, 83 .fo_fill_kinfo = eventfd_fill_kinfo, 84 .fo_flags = DFLAG_PASSABLE 85 }; 86 87 static void filt_eventfddetach(struct knote *kn); 88 static int filt_eventfdread(struct knote *kn, long hint); 89 static int filt_eventfdwrite(struct knote *kn, long hint); 90 91 static struct filterops eventfd_rfiltops = { 92 .f_isfd = 1, 93 .f_detach = filt_eventfddetach, 94 .f_event = filt_eventfdread 95 }; 96 97 static struct filterops eventfd_wfiltops = { 98 .f_isfd = 1, 99 .f_detach = filt_eventfddetach, 100 .f_event = filt_eventfdwrite 101 }; 102 103 struct eventfd { 104 eventfd_t efd_count; 105 uint32_t efd_flags; 106 struct selinfo efd_sel; 107 struct mtx efd_lock; 108 }; 109 110 int 111 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 112 int flags) 113 { 114 struct eventfd *efd; 115 int fflags; 116 117 AUDIT_ARG_FFLAGS(flags); 118 AUDIT_ARG_VALUE(initval); 119 120 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 121 efd->efd_flags = flags; 122 efd->efd_count = initval; 123 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 124 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 125 126 fflags = FREAD | FWRITE; 127 if ((flags & EFD_NONBLOCK) != 0) 128 fflags |= FNONBLOCK; 129 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 130 131 return (0); 132 } 133 134 static int 135 eventfd_close(struct file *fp, struct thread *td) 136 { 137 struct eventfd *efd; 138 139 efd = fp->f_data; 140 seldrain(&efd->efd_sel); 141 knlist_destroy(&efd->efd_sel.si_note); 142 mtx_destroy(&efd->efd_lock); 143 free(efd, M_EVENTFD); 144 return (0); 145 } 146 147 static int 148 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 149 int flags, struct thread *td) 150 { 151 struct eventfd *efd; 152 eventfd_t count; 153 int error; 154 155 if (uio->uio_resid < sizeof(eventfd_t)) 156 return (EINVAL); 157 158 error = 0; 159 efd = fp->f_data; 160 mtx_lock(&efd->efd_lock); 161 while (error == 0 && efd->efd_count == 0) { 162 if ((fp->f_flag & FNONBLOCK) != 0) { 163 mtx_unlock(&efd->efd_lock); 164 return (EAGAIN); 165 } 166 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 167 "efdrd", 0); 168 } 169 if (error == 0) { 170 MPASS(efd->efd_count > 0); 171 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 172 count = 1; 173 --efd->efd_count; 174 } else { 175 count = efd->efd_count; 176 efd->efd_count = 0; 177 } 178 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 179 selwakeup(&efd->efd_sel); 180 wakeup(&efd->efd_count); 181 mtx_unlock(&efd->efd_lock); 182 error = uiomove(&count, sizeof(eventfd_t), uio); 183 } else 184 mtx_unlock(&efd->efd_lock); 185 186 return (error); 187 } 188 189 static int 190 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 191 int flags, struct thread *td) 192 { 193 struct eventfd *efd; 194 eventfd_t count; 195 int error; 196 197 if (uio->uio_resid < sizeof(eventfd_t)) 198 return (EINVAL); 199 200 error = uiomove(&count, sizeof(eventfd_t), uio); 201 if (error != 0) 202 return (error); 203 if (count == UINT64_MAX) 204 return (EINVAL); 205 206 efd = fp->f_data; 207 mtx_lock(&efd->efd_lock); 208 retry: 209 if (UINT64_MAX - efd->efd_count <= count) { 210 if ((fp->f_flag & FNONBLOCK) != 0) { 211 mtx_unlock(&efd->efd_lock); 212 /* Do not not return the number of bytes written */ 213 uio->uio_resid += sizeof(eventfd_t); 214 return (EAGAIN); 215 } 216 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 217 PCATCH, "efdwr", 0); 218 if (error == 0) 219 goto retry; 220 } 221 if (error == 0) { 222 MPASS(UINT64_MAX - efd->efd_count > count); 223 efd->efd_count += count; 224 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 225 selwakeup(&efd->efd_sel); 226 wakeup(&efd->efd_count); 227 } 228 mtx_unlock(&efd->efd_lock); 229 230 return (error); 231 } 232 233 static int 234 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 235 struct thread *td) 236 { 237 struct eventfd *efd; 238 int revents; 239 240 efd = fp->f_data; 241 revents = 0; 242 mtx_lock(&efd->efd_lock); 243 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 244 revents |= events & (POLLIN | POLLRDNORM); 245 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 246 efd->efd_count) 247 revents |= events & (POLLOUT | POLLWRNORM); 248 if (revents == 0) 249 selrecord(td, &efd->efd_sel); 250 mtx_unlock(&efd->efd_lock); 251 252 return (revents); 253 } 254 255 static int 256 eventfd_kqfilter(struct file *fp, struct knote *kn) 257 { 258 struct eventfd *efd = fp->f_data; 259 260 mtx_lock(&efd->efd_lock); 261 switch (kn->kn_filter) { 262 case EVFILT_READ: 263 kn->kn_fop = &eventfd_rfiltops; 264 break; 265 case EVFILT_WRITE: 266 kn->kn_fop = &eventfd_wfiltops; 267 break; 268 default: 269 mtx_unlock(&efd->efd_lock); 270 return (EINVAL); 271 } 272 273 kn->kn_hook = efd; 274 knlist_add(&efd->efd_sel.si_note, kn, 1); 275 mtx_unlock(&efd->efd_lock); 276 277 return (0); 278 } 279 280 static void 281 filt_eventfddetach(struct knote *kn) 282 { 283 struct eventfd *efd = kn->kn_hook; 284 285 mtx_lock(&efd->efd_lock); 286 knlist_remove(&efd->efd_sel.si_note, kn, 1); 287 mtx_unlock(&efd->efd_lock); 288 } 289 290 static int 291 filt_eventfdread(struct knote *kn, long hint) 292 { 293 struct eventfd *efd = kn->kn_hook; 294 int ret; 295 296 mtx_assert(&efd->efd_lock, MA_OWNED); 297 kn->kn_data = (int64_t)efd->efd_count; 298 ret = efd->efd_count > 0; 299 300 return (ret); 301 } 302 303 static int 304 filt_eventfdwrite(struct knote *kn, long hint) 305 { 306 struct eventfd *efd = kn->kn_hook; 307 int ret; 308 309 mtx_assert(&efd->efd_lock, MA_OWNED); 310 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 311 ret = UINT64_MAX - 1 > efd->efd_count; 312 313 return (ret); 314 } 315 316 static int 317 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 318 struct ucred *active_cred, struct thread *td) 319 { 320 switch (cmd) { 321 case FIONBIO: 322 case FIOASYNC: 323 return (0); 324 } 325 326 return (ENOTTY); 327 } 328 329 static int 330 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 331 struct thread *td) 332 { 333 bzero((void *)st, sizeof *st); 334 st->st_mode = S_IFIFO; 335 return (0); 336 } 337 338 static int 339 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 340 { 341 struct eventfd *efd = fp->f_data; 342 343 kif->kf_type = KF_TYPE_EVENTFD; 344 mtx_lock(&efd->efd_lock); 345 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 346 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 347 mtx_unlock(&efd->efd_lock); 348 return (0); 349 } 350