1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2014 Dmitry Chagin 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/malloc.h> 35 #include <sys/limits.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 #include <sys/types.h> 39 #include <sys/user.h> 40 #include <sys/fcntl.h> 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/filio.h> 44 #include <sys/stat.h> 45 #include <sys/errno.h> 46 #include <sys/event.h> 47 #include <sys/poll.h> 48 #include <sys/proc.h> 49 #include <sys/uio.h> 50 #include <sys/selinfo.h> 51 #include <sys/eventfd.h> 52 53 #include <security/audit/audit.h> 54 55 _Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); 56 _Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); 57 58 MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); 59 60 static fo_rdwr_t eventfd_read; 61 static fo_rdwr_t eventfd_write; 62 static fo_ioctl_t eventfd_ioctl; 63 static fo_poll_t eventfd_poll; 64 static fo_kqfilter_t eventfd_kqfilter; 65 static fo_stat_t eventfd_stat; 66 static fo_close_t eventfd_close; 67 static fo_fill_kinfo_t eventfd_fill_kinfo; 68 69 static struct fileops eventfdops = { 70 .fo_read = eventfd_read, 71 .fo_write = eventfd_write, 72 .fo_truncate = invfo_truncate, 73 .fo_ioctl = eventfd_ioctl, 74 .fo_poll = eventfd_poll, 75 .fo_kqfilter = eventfd_kqfilter, 76 .fo_stat = eventfd_stat, 77 .fo_close = eventfd_close, 78 .fo_chmod = invfo_chmod, 79 .fo_chown = invfo_chown, 80 .fo_sendfile = invfo_sendfile, 81 .fo_fill_kinfo = eventfd_fill_kinfo, 82 .fo_flags = DFLAG_PASSABLE 83 }; 84 85 static void filt_eventfddetach(struct knote *kn); 86 static int filt_eventfdread(struct knote *kn, long hint); 87 static int filt_eventfdwrite(struct knote *kn, long hint); 88 89 static struct filterops eventfd_rfiltops = { 90 .f_isfd = 1, 91 .f_detach = filt_eventfddetach, 92 .f_event = filt_eventfdread 93 }; 94 95 static struct filterops eventfd_wfiltops = { 96 .f_isfd = 1, 97 .f_detach = filt_eventfddetach, 98 .f_event = filt_eventfdwrite 99 }; 100 101 struct eventfd { 102 eventfd_t efd_count; 103 uint32_t efd_flags; 104 struct selinfo efd_sel; 105 struct mtx efd_lock; 106 }; 107 108 int 109 eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, 110 int flags) 111 { 112 struct eventfd *efd; 113 int fflags; 114 115 AUDIT_ARG_FFLAGS(flags); 116 AUDIT_ARG_VALUE(initval); 117 118 efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); 119 efd->efd_flags = flags; 120 efd->efd_count = initval; 121 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 122 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 123 124 fflags = FREAD | FWRITE; 125 if ((flags & EFD_NONBLOCK) != 0) 126 fflags |= FNONBLOCK; 127 finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); 128 129 return (0); 130 } 131 132 static int 133 eventfd_close(struct file *fp, struct thread *td) 134 { 135 struct eventfd *efd; 136 137 efd = fp->f_data; 138 seldrain(&efd->efd_sel); 139 knlist_destroy(&efd->efd_sel.si_note); 140 mtx_destroy(&efd->efd_lock); 141 free(efd, M_EVENTFD); 142 return (0); 143 } 144 145 static int 146 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 147 int flags, struct thread *td) 148 { 149 struct eventfd *efd; 150 eventfd_t count; 151 int error; 152 153 if (uio->uio_resid < sizeof(eventfd_t)) 154 return (EINVAL); 155 156 error = 0; 157 efd = fp->f_data; 158 mtx_lock(&efd->efd_lock); 159 while (error == 0 && efd->efd_count == 0) { 160 if ((fp->f_flag & FNONBLOCK) != 0) { 161 mtx_unlock(&efd->efd_lock); 162 return (EAGAIN); 163 } 164 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, 165 "efdrd", 0); 166 } 167 if (error == 0) { 168 MPASS(efd->efd_count > 0); 169 if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { 170 count = 1; 171 --efd->efd_count; 172 } else { 173 count = efd->efd_count; 174 efd->efd_count = 0; 175 } 176 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 177 selwakeup(&efd->efd_sel); 178 wakeup(&efd->efd_count); 179 mtx_unlock(&efd->efd_lock); 180 error = uiomove(&count, sizeof(eventfd_t), uio); 181 } else 182 mtx_unlock(&efd->efd_lock); 183 184 return (error); 185 } 186 187 static int 188 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 189 int flags, struct thread *td) 190 { 191 struct eventfd *efd; 192 eventfd_t count; 193 int error; 194 195 if (uio->uio_resid < sizeof(eventfd_t)) 196 return (EINVAL); 197 198 error = uiomove(&count, sizeof(eventfd_t), uio); 199 if (error != 0) 200 return (error); 201 if (count == UINT64_MAX) 202 return (EINVAL); 203 204 efd = fp->f_data; 205 mtx_lock(&efd->efd_lock); 206 retry: 207 if (UINT64_MAX - efd->efd_count <= count) { 208 if ((fp->f_flag & FNONBLOCK) != 0) { 209 mtx_unlock(&efd->efd_lock); 210 /* Do not not return the number of bytes written */ 211 uio->uio_resid += sizeof(eventfd_t); 212 return (EAGAIN); 213 } 214 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 215 PCATCH, "efdwr", 0); 216 if (error == 0) 217 goto retry; 218 } 219 if (error == 0) { 220 MPASS(UINT64_MAX - efd->efd_count > count); 221 efd->efd_count += count; 222 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 223 selwakeup(&efd->efd_sel); 224 wakeup(&efd->efd_count); 225 } 226 mtx_unlock(&efd->efd_lock); 227 228 return (error); 229 } 230 231 static int 232 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 233 struct thread *td) 234 { 235 struct eventfd *efd; 236 int revents; 237 238 efd = fp->f_data; 239 revents = 0; 240 mtx_lock(&efd->efd_lock); 241 if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) 242 revents |= events & (POLLIN | POLLRDNORM); 243 if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > 244 efd->efd_count) 245 revents |= events & (POLLOUT | POLLWRNORM); 246 if (revents == 0) 247 selrecord(td, &efd->efd_sel); 248 mtx_unlock(&efd->efd_lock); 249 250 return (revents); 251 } 252 253 static int 254 eventfd_kqfilter(struct file *fp, struct knote *kn) 255 { 256 struct eventfd *efd = fp->f_data; 257 258 mtx_lock(&efd->efd_lock); 259 switch (kn->kn_filter) { 260 case EVFILT_READ: 261 kn->kn_fop = &eventfd_rfiltops; 262 break; 263 case EVFILT_WRITE: 264 kn->kn_fop = &eventfd_wfiltops; 265 break; 266 default: 267 mtx_unlock(&efd->efd_lock); 268 return (EINVAL); 269 } 270 271 kn->kn_hook = efd; 272 knlist_add(&efd->efd_sel.si_note, kn, 1); 273 mtx_unlock(&efd->efd_lock); 274 275 return (0); 276 } 277 278 static void 279 filt_eventfddetach(struct knote *kn) 280 { 281 struct eventfd *efd = kn->kn_hook; 282 283 mtx_lock(&efd->efd_lock); 284 knlist_remove(&efd->efd_sel.si_note, kn, 1); 285 mtx_unlock(&efd->efd_lock); 286 } 287 288 static int 289 filt_eventfdread(struct knote *kn, long hint) 290 { 291 struct eventfd *efd = kn->kn_hook; 292 int ret; 293 294 mtx_assert(&efd->efd_lock, MA_OWNED); 295 kn->kn_data = (int64_t)efd->efd_count; 296 ret = efd->efd_count > 0; 297 298 return (ret); 299 } 300 301 static int 302 filt_eventfdwrite(struct knote *kn, long hint) 303 { 304 struct eventfd *efd = kn->kn_hook; 305 int ret; 306 307 mtx_assert(&efd->efd_lock, MA_OWNED); 308 kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); 309 ret = UINT64_MAX - 1 > efd->efd_count; 310 311 return (ret); 312 } 313 314 static int 315 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 316 struct ucred *active_cred, struct thread *td) 317 { 318 switch (cmd) { 319 case FIONBIO: 320 case FIOASYNC: 321 return (0); 322 } 323 324 return (ENOTTY); 325 } 326 327 static int 328 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 329 struct thread *td) 330 { 331 bzero((void *)st, sizeof *st); 332 st->st_mode = S_IFIFO; 333 return (0); 334 } 335 336 static int 337 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 338 { 339 struct eventfd *efd = fp->f_data; 340 341 kif->kf_type = KF_TYPE_EVENTFD; 342 mtx_lock(&efd->efd_lock); 343 kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; 344 kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; 345 mtx_unlock(&efd->efd_lock); 346 return (0); 347 } 348