1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org> 6 * Copyright (c) 2009 Apple, Inc. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_ktrace.h" 33 #include "opt_kqueue.h" 34 35 #ifdef COMPAT_FREEBSD11 36 #define _WANT_FREEBSD11_KEVENT 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/capsicum.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/proc.h> 47 #include <sys/malloc.h> 48 #include <sys/unistd.h> 49 #include <sys/file.h> 50 #include <sys/filedesc.h> 51 #include <sys/filio.h> 52 #include <sys/fcntl.h> 53 #include <sys/jail.h> 54 #include <sys/jaildesc.h> 55 #include <sys/kthread.h> 56 #include <sys/selinfo.h> 57 #include <sys/queue.h> 58 #include <sys/event.h> 59 #include <sys/eventvar.h> 60 #include <sys/poll.h> 61 #include <sys/protosw.h> 62 #include <sys/resourcevar.h> 63 #include <sys/sbuf.h> 64 #include <sys/sigio.h> 65 #include <sys/signalvar.h> 66 #include <sys/socket.h> 67 #include <sys/socketvar.h> 68 #include <sys/stat.h> 69 #include <sys/sysctl.h> 70 #include <sys/sysent.h> 71 #include <sys/sysproto.h> 72 #include <sys/syscallsubr.h> 73 #include <sys/taskqueue.h> 74 #include <sys/uio.h> 75 #include <sys/user.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 #include <machine/atomic.h> 80 #ifdef COMPAT_FREEBSD32 81 #include <compat/freebsd32/freebsd32.h> 82 #include <compat/freebsd32/freebsd32_util.h> 83 #endif 84 85 #include <vm/uma.h> 86 87 static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 88 89 /* 90 * This lock is used if multiple kq locks are required. This possibly 91 * should be made into a per proc lock. 92 */ 93 static struct mtx kq_global; 94 MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); 95 #define KQ_GLOBAL_LOCK(lck, haslck) do { \ 96 if (!haslck) \ 97 mtx_lock(lck); \ 98 haslck = 1; \ 99 } while (0) 100 #define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ 101 if (haslck) \ 102 mtx_unlock(lck); \ 103 haslck = 0; \ 104 } while (0) 105 106 TASKQUEUE_DEFINE_THREAD(kqueue_ctx); 107 108 static int kevent_copyout(void *arg, struct kevent *kevp, int count); 109 static int kevent_copyin(void *arg, struct kevent *kevp, int count); 110 static int kqueue_register(struct kqueue *kq, struct kevent *kev, 111 struct thread *td, int mflag); 112 static int kqueue_acquire(struct file *fp, struct kqueue **kqp); 113 static void kqueue_release(struct kqueue *kq, int locked); 114 static void kqueue_destroy(struct kqueue *kq); 115 static void kqueue_drain(struct kqueue *kq, struct thread *td); 116 static int kqueue_expand(struct kqueue *kq, const struct filterops *fops, 117 uintptr_t ident, int mflag); 118 static void kqueue_task(void *arg, int pending); 119 static int kqueue_scan(struct kqueue *kq, int maxevents, 120 struct kevent_copyops *k_ops, 121 const struct timespec *timeout, 122 struct kevent *keva, struct thread *td); 123 static void kqueue_wakeup(struct kqueue *kq); 124 static const struct filterops *kqueue_fo_find(int filt); 125 static void kqueue_fo_release(int filt); 126 struct g_kevent_args; 127 static int kern_kevent_generic(struct thread *td, 128 struct g_kevent_args *uap, 129 struct kevent_copyops *k_ops, const char *struct_name); 130 131 static fo_ioctl_t kqueue_ioctl; 132 static fo_poll_t kqueue_poll; 133 static fo_kqfilter_t kqueue_kqfilter; 134 static fo_stat_t kqueue_stat; 135 static fo_close_t kqueue_close; 136 static fo_fill_kinfo_t kqueue_fill_kinfo; 137 static fo_fork_t kqueue_fork; 138 139 static const struct fileops kqueueops = { 140 .fo_read = invfo_rdwr, 141 .fo_write = invfo_rdwr, 142 .fo_truncate = invfo_truncate, 143 .fo_ioctl = kqueue_ioctl, 144 .fo_poll = kqueue_poll, 145 .fo_kqfilter = kqueue_kqfilter, 146 .fo_stat = kqueue_stat, 147 .fo_close = kqueue_close, 148 .fo_chmod = invfo_chmod, 149 .fo_chown = invfo_chown, 150 .fo_sendfile = invfo_sendfile, 151 .fo_cmp = file_kcmp_generic, 152 .fo_fork = kqueue_fork, 153 .fo_fill_kinfo = kqueue_fill_kinfo, 154 .fo_flags = DFLAG_FORK, 155 }; 156 157 static int knote_attach(struct knote *kn, struct kqueue *kq); 158 static void knote_drop(struct knote *kn, struct thread *td); 159 static void knote_drop_detached(struct knote *kn, struct thread *td); 160 static void knote_enqueue(struct knote *kn); 161 static void knote_dequeue(struct knote *kn); 162 static void knote_init(void *); 163 static struct knote *knote_alloc(int mflag); 164 static void knote_free(struct knote *kn); 165 166 static void filt_kqdetach(struct knote *kn); 167 static int filt_kqueue(struct knote *kn, long hint); 168 static int filt_procattach(struct knote *kn); 169 static void filt_procdetach(struct knote *kn); 170 static int filt_proc(struct knote *kn, long hint); 171 static int filt_jailattach(struct knote *kn); 172 static void filt_jaildetach(struct knote *kn); 173 static int filt_jail(struct knote *kn, long hint); 174 static int filt_fileattach(struct knote *kn); 175 static void filt_timerexpire(void *knx); 176 static void filt_timerexpire_l(struct knote *kn, bool proc_locked); 177 static int filt_timerattach(struct knote *kn); 178 static void filt_timerdetach(struct knote *kn); 179 static void filt_timerstart(struct knote *kn, sbintime_t to); 180 static void filt_timertouch(struct knote *kn, struct kevent *kev, 181 u_long type); 182 static int filt_timercopy(struct knote *kn, struct proc *p1); 183 static int filt_timervalidate(struct knote *kn, sbintime_t *to); 184 static int filt_timer(struct knote *kn, long hint); 185 static int filt_userattach(struct knote *kn); 186 static void filt_userdetach(struct knote *kn); 187 static int filt_user(struct knote *kn, long hint); 188 static void filt_usertouch(struct knote *kn, struct kevent *kev, 189 u_long type); 190 191 static const struct filterops file_filtops = { 192 .f_isfd = 1, 193 .f_attach = filt_fileattach, 194 .f_copy = knote_triv_copy, 195 }; 196 static const struct filterops kqread_filtops = { 197 .f_isfd = 1, 198 .f_detach = filt_kqdetach, 199 .f_event = filt_kqueue, 200 .f_copy = knote_triv_copy, 201 }; 202 /* XXX - move to kern_proc.c? */ 203 static const struct filterops proc_filtops = { 204 .f_isfd = 0, 205 .f_attach = filt_procattach, 206 .f_detach = filt_procdetach, 207 .f_event = filt_proc, 208 .f_copy = knote_triv_copy, 209 }; 210 static const struct filterops jail_filtops = { 211 .f_isfd = 0, 212 .f_attach = filt_jailattach, 213 .f_detach = filt_jaildetach, 214 .f_event = filt_jail, 215 .f_copy = knote_triv_copy, 216 }; 217 static const struct filterops timer_filtops = { 218 .f_isfd = 0, 219 .f_attach = filt_timerattach, 220 .f_detach = filt_timerdetach, 221 .f_event = filt_timer, 222 .f_touch = filt_timertouch, 223 .f_copy = filt_timercopy, 224 }; 225 static const struct filterops user_filtops = { 226 .f_attach = filt_userattach, 227 .f_detach = filt_userdetach, 228 .f_event = filt_user, 229 .f_touch = filt_usertouch, 230 .f_copy = knote_triv_copy, 231 }; 232 233 static uma_zone_t knote_zone; 234 static unsigned int __exclusive_cache_line kq_ncallouts; 235 static unsigned int kq_calloutmax = 4 * 1024; 236 SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 237 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 238 239 /* XXX - ensure not influx ? */ 240 #define KNOTE_ACTIVATE(kn, islock) do { \ 241 if ((islock)) \ 242 mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ 243 else \ 244 KQ_LOCK((kn)->kn_kq); \ 245 (kn)->kn_status |= KN_ACTIVE; \ 246 if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 247 knote_enqueue((kn)); \ 248 if (!(islock)) \ 249 KQ_UNLOCK((kn)->kn_kq); \ 250 } while (0) 251 #define KQ_LOCK(kq) do { \ 252 mtx_lock(&(kq)->kq_lock); \ 253 } while (0) 254 #define KQ_FLUX_WAKEUP(kq) do { \ 255 if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ 256 (kq)->kq_state &= ~KQ_FLUXWAIT; \ 257 wakeup((kq)); \ 258 } \ 259 } while (0) 260 #define KQ_UNLOCK_FLUX(kq) do { \ 261 KQ_FLUX_WAKEUP(kq); \ 262 mtx_unlock(&(kq)->kq_lock); \ 263 } while (0) 264 #define KQ_UNLOCK(kq) do { \ 265 mtx_unlock(&(kq)->kq_lock); \ 266 } while (0) 267 #define KQ_OWNED(kq) do { \ 268 mtx_assert(&(kq)->kq_lock, MA_OWNED); \ 269 } while (0) 270 #define KQ_NOTOWNED(kq) do { \ 271 mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ 272 } while (0) 273 274 static struct knlist * 275 kn_list_lock(struct knote *kn) 276 { 277 struct knlist *knl; 278 279 knl = kn->kn_knlist; 280 if (knl != NULL) 281 knl->kl_lock(knl->kl_lockarg); 282 return (knl); 283 } 284 285 static void 286 kn_list_unlock(struct knlist *knl) 287 { 288 bool do_free; 289 290 if (knl == NULL) 291 return; 292 do_free = knl->kl_autodestroy && knlist_empty(knl); 293 knl->kl_unlock(knl->kl_lockarg); 294 if (do_free) { 295 knlist_destroy(knl); 296 free(knl, M_KQUEUE); 297 } 298 } 299 300 static bool 301 kn_in_flux(struct knote *kn) 302 { 303 304 return (kn->kn_influx > 0); 305 } 306 307 static void 308 kn_enter_flux(struct knote *kn) 309 { 310 311 KQ_OWNED(kn->kn_kq); 312 MPASS(kn->kn_influx < INT_MAX); 313 kn->kn_influx++; 314 } 315 316 static bool 317 kn_leave_flux(struct knote *kn) 318 { 319 320 KQ_OWNED(kn->kn_kq); 321 MPASS(kn->kn_influx > 0); 322 kn->kn_influx--; 323 return (kn->kn_influx == 0); 324 } 325 326 #define KNL_ASSERT_LOCK(knl, islocked) do { \ 327 if (islocked) \ 328 KNL_ASSERT_LOCKED(knl); \ 329 else \ 330 KNL_ASSERT_UNLOCKED(knl); \ 331 } while (0) 332 #ifdef INVARIANTS 333 #define KNL_ASSERT_LOCKED(knl) do { \ 334 knl->kl_assert_lock((knl)->kl_lockarg, LA_LOCKED); \ 335 } while (0) 336 #define KNL_ASSERT_UNLOCKED(knl) do { \ 337 knl->kl_assert_lock((knl)->kl_lockarg, LA_UNLOCKED); \ 338 } while (0) 339 #else /* !INVARIANTS */ 340 #define KNL_ASSERT_LOCKED(knl) do {} while (0) 341 #define KNL_ASSERT_UNLOCKED(knl) do {} while (0) 342 #endif /* INVARIANTS */ 343 344 #ifndef KN_HASHSIZE 345 #define KN_HASHSIZE 64 /* XXX should be tunable */ 346 #endif 347 348 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 349 350 static int 351 filt_nullattach(struct knote *kn) 352 { 353 354 return (ENXIO); 355 }; 356 357 static const struct filterops null_filtops = { 358 .f_isfd = 0, 359 .f_attach = filt_nullattach, 360 .f_copy = knote_triv_copy, 361 }; 362 363 /* XXX - make SYSINIT to add these, and move into respective modules. */ 364 extern const struct filterops sig_filtops; 365 extern const struct filterops fs_filtops; 366 367 /* 368 * Table for all system-defined filters. 369 */ 370 static struct mtx filterops_lock; 371 MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", MTX_DEF); 372 static struct { 373 const struct filterops *for_fop; 374 int for_nolock; 375 int for_refcnt; 376 } sysfilt_ops[EVFILT_SYSCOUNT] = { 377 [~EVFILT_READ] = { &file_filtops, 1 }, 378 [~EVFILT_WRITE] = { &file_filtops, 1 }, 379 [~EVFILT_AIO] = { &null_filtops }, 380 [~EVFILT_VNODE] = { &file_filtops, 1 }, 381 [~EVFILT_PROC] = { &proc_filtops, 1 }, 382 [~EVFILT_SIGNAL] = { &sig_filtops, 1 }, 383 [~EVFILT_TIMER] = { &timer_filtops, 1 }, 384 [~EVFILT_PROCDESC] = { &file_filtops, 1 }, 385 [~EVFILT_FS] = { &fs_filtops, 1 }, 386 [~EVFILT_LIO] = { &null_filtops }, 387 [~EVFILT_USER] = { &user_filtops, 1 }, 388 [~EVFILT_SENDFILE] = { &null_filtops }, 389 [~EVFILT_EMPTY] = { &file_filtops, 1 }, 390 [~EVFILT_JAIL] = { &jail_filtops, 1 }, 391 [~EVFILT_JAILDESC] = { &file_filtops, 1 }, 392 }; 393 394 /* 395 * Simple redirection for all cdevsw style objects to call their fo_kqfilter 396 * method. 397 */ 398 static int 399 filt_fileattach(struct knote *kn) 400 { 401 402 return (fo_kqfilter(kn->kn_fp, kn)); 403 } 404 405 /*ARGSUSED*/ 406 static int 407 kqueue_kqfilter(struct file *fp, struct knote *kn) 408 { 409 struct kqueue *kq = kn->kn_fp->f_data; 410 411 if (kn->kn_filter != EVFILT_READ) 412 return (EINVAL); 413 414 kn->kn_status |= KN_KQUEUE; 415 kn->kn_fop = &kqread_filtops; 416 knlist_add(&kq->kq_sel.si_note, kn, 0); 417 418 return (0); 419 } 420 421 static void 422 filt_kqdetach(struct knote *kn) 423 { 424 struct kqueue *kq = kn->kn_fp->f_data; 425 426 knlist_remove(&kq->kq_sel.si_note, kn, 0); 427 } 428 429 /*ARGSUSED*/ 430 static int 431 filt_kqueue(struct knote *kn, long hint) 432 { 433 struct kqueue *kq = kn->kn_fp->f_data; 434 435 kn->kn_data = kq->kq_count; 436 return (kn->kn_data > 0); 437 } 438 439 /* XXX - move to kern_proc.c? */ 440 static int 441 filt_procattach(struct knote *kn) 442 { 443 struct proc *p; 444 int error; 445 bool exiting, immediate; 446 447 exiting = immediate = false; 448 if (kn->kn_sfflags & NOTE_EXIT) 449 p = pfind_any(kn->kn_id); 450 else 451 p = pfind(kn->kn_id); 452 if (p == NULL) 453 return (ESRCH); 454 if (p->p_flag & P_WEXIT) 455 exiting = true; 456 457 if ((error = p_cansee(curthread, p))) { 458 PROC_UNLOCK(p); 459 return (error); 460 } 461 462 kn->kn_ptr.p_proc = p; 463 kn->kn_flags |= EV_CLEAR; /* automatically set */ 464 465 /* 466 * Internal flag indicating registration done by kernel for the 467 * purposes of getting a NOTE_CHILD notification. 468 */ 469 if (kn->kn_flags & EV_FLAG2) { 470 kn->kn_flags &= ~EV_FLAG2; 471 kn->kn_data = kn->kn_sdata; /* ppid */ 472 kn->kn_fflags = NOTE_CHILD; 473 kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK); 474 immediate = true; /* Force immediate activation of child note. */ 475 } 476 /* 477 * Internal flag indicating registration done by kernel (for other than 478 * NOTE_CHILD). 479 */ 480 if (kn->kn_flags & EV_FLAG1) { 481 kn->kn_flags &= ~EV_FLAG1; 482 } 483 484 knlist_add(p->p_klist, kn, 1); 485 486 /* 487 * Immediately activate any child notes or, in the case of a zombie 488 * target process, exit notes. The latter is necessary to handle the 489 * case where the target process, e.g. a child, dies before the kevent 490 * is registered. 491 */ 492 if (immediate || (exiting && filt_proc(kn, NOTE_EXIT))) 493 KNOTE_ACTIVATE(kn, 0); 494 495 PROC_UNLOCK(p); 496 497 return (0); 498 } 499 500 /* 501 * The knote may be attached to a different process, which may exit, 502 * leaving nothing for the knote to be attached to. So when the process 503 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 504 * it will be deleted when read out. However, as part of the knote deletion, 505 * this routine is called, so a check is needed to avoid actually performing 506 * a detach, because the original process does not exist any more. 507 */ 508 /* XXX - move to kern_proc.c? */ 509 static void 510 filt_procdetach(struct knote *kn) 511 { 512 513 knlist_remove(kn->kn_knlist, kn, 0); 514 kn->kn_ptr.p_proc = NULL; 515 } 516 517 /* XXX - move to kern_proc.c? */ 518 static int 519 filt_proc(struct knote *kn, long hint) 520 { 521 struct proc *p; 522 u_int event; 523 524 p = kn->kn_ptr.p_proc; 525 if (p == NULL) /* already activated, from attach filter */ 526 return (0); 527 528 /* Mask off extra data. */ 529 event = (u_int)hint & NOTE_PCTRLMASK; 530 531 /* If the user is interested in this event, record it. */ 532 if (kn->kn_sfflags & event) 533 kn->kn_fflags |= event; 534 535 /* Process is gone, so flag the event as finished. */ 536 if (event == NOTE_EXIT) { 537 kn->kn_flags |= EV_EOF | EV_ONESHOT; 538 kn->kn_ptr.p_proc = NULL; 539 if (kn->kn_fflags & NOTE_EXIT) 540 kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig); 541 if (kn->kn_fflags == 0) 542 kn->kn_flags |= EV_DROP; 543 return (1); 544 } 545 546 return (kn->kn_fflags != 0); 547 } 548 549 /* 550 * Called when the process forked. It mostly does the same as the 551 * knote(), activating all knotes registered to be activated when the 552 * process forked. Additionally, for each knote attached to the 553 * parent, check whether user wants to track the new process. If so 554 * attach a new knote to it, and immediately report an event with the 555 * child's pid. 556 */ 557 void 558 knote_fork(struct knlist *list, int pid) 559 { 560 struct kqueue *kq; 561 struct knote *kn; 562 struct kevent kev; 563 int error; 564 565 MPASS(list != NULL); 566 KNL_ASSERT_LOCKED(list); 567 if (SLIST_EMPTY(&list->kl_list)) 568 return; 569 570 memset(&kev, 0, sizeof(kev)); 571 SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { 572 kq = kn->kn_kq; 573 KQ_LOCK(kq); 574 if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { 575 KQ_UNLOCK(kq); 576 continue; 577 } 578 579 /* 580 * The same as knote(), activate the event. 581 */ 582 if ((kn->kn_sfflags & NOTE_TRACK) == 0) { 583 if (kn->kn_fop->f_event(kn, NOTE_FORK)) 584 KNOTE_ACTIVATE(kn, 1); 585 KQ_UNLOCK(kq); 586 continue; 587 } 588 589 /* 590 * The NOTE_TRACK case. In addition to the activation 591 * of the event, we need to register new events to 592 * track the child. Drop the locks in preparation for 593 * the call to kqueue_register(). 594 */ 595 kn_enter_flux(kn); 596 KQ_UNLOCK(kq); 597 list->kl_unlock(list->kl_lockarg); 598 599 /* 600 * Activate existing knote and register tracking knotes with 601 * new process. 602 * 603 * First register a knote to get just the child notice. This 604 * must be a separate note from a potential NOTE_EXIT 605 * notification since both NOTE_CHILD and NOTE_EXIT are defined 606 * to use the data field (in conflicting ways). 607 */ 608 kev.ident = pid; 609 kev.filter = kn->kn_filter; 610 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | 611 EV_FLAG2; 612 kev.fflags = kn->kn_sfflags; 613 kev.data = kn->kn_id; /* parent */ 614 kev.udata = kn->kn_kevent.udata;/* preserve udata */ 615 error = kqueue_register(kq, &kev, NULL, M_NOWAIT); 616 if (error) 617 kn->kn_fflags |= NOTE_TRACKERR; 618 619 /* 620 * Then register another knote to track other potential events 621 * from the new process. 622 */ 623 kev.ident = pid; 624 kev.filter = kn->kn_filter; 625 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 626 kev.fflags = kn->kn_sfflags; 627 kev.data = kn->kn_id; /* parent */ 628 kev.udata = kn->kn_kevent.udata;/* preserve udata */ 629 error = kqueue_register(kq, &kev, NULL, M_NOWAIT); 630 if (error) 631 kn->kn_fflags |= NOTE_TRACKERR; 632 if (kn->kn_fop->f_event(kn, NOTE_FORK)) 633 KNOTE_ACTIVATE(kn, 0); 634 list->kl_lock(list->kl_lockarg); 635 KQ_LOCK(kq); 636 kn_leave_flux(kn); 637 KQ_UNLOCK_FLUX(kq); 638 } 639 } 640 641 int 642 filt_jailattach(struct knote *kn) 643 { 644 struct prison *pr; 645 646 if (kn->kn_id == 0) { 647 /* Let jid=0 watch the current prison (including prison0). */ 648 pr = curthread->td_ucred->cr_prison; 649 mtx_lock(&pr->pr_mtx); 650 } else { 651 sx_slock(&allprison_lock); 652 pr = prison_find_child(curthread->td_ucred->cr_prison, 653 kn->kn_id); 654 sx_sunlock(&allprison_lock); 655 if (pr == NULL) 656 return (ENOENT); 657 if (!prison_isalive(pr)) { 658 mtx_unlock(&pr->pr_mtx); 659 return (ENOENT); 660 } 661 } 662 kn->kn_ptr.p_prison = pr; 663 kn->kn_flags |= EV_CLEAR; 664 knlist_add(pr->pr_klist, kn, 1); 665 mtx_unlock(&pr->pr_mtx); 666 return (0); 667 } 668 669 void 670 filt_jaildetach(struct knote *kn) 671 { 672 if (kn->kn_ptr.p_prison != NULL) { 673 knlist_remove(kn->kn_knlist, kn, 0); 674 kn->kn_ptr.p_prison = NULL; 675 } else 676 kn->kn_status |= KN_DETACHED; 677 } 678 679 int 680 filt_jail(struct knote *kn, long hint) 681 { 682 struct prison *pr; 683 u_int event; 684 685 pr = kn->kn_ptr.p_prison; 686 if (pr == NULL) /* already activated, from attach filter */ 687 return (0); 688 689 /* 690 * Mask off extra data. In the NOTE_JAIL_CHILD case, that's 691 * everything except the NOTE_JAIL_CHILD bit itself, since a 692 * JID is any positive integer. 693 */ 694 event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD : 695 (u_int)hint & NOTE_JAIL_CTRLMASK; 696 697 /* If the user is interested in this event, record it. */ 698 if (kn->kn_sfflags & event) { 699 kn->kn_fflags |= event; 700 /* Report the created jail id or attached process id. */ 701 if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) { 702 if (kn->kn_data != 0) 703 kn->kn_fflags |= NOTE_JAIL_MULTI; 704 kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U : 705 (u_int)hint & ~event; 706 } 707 } 708 709 /* Prison is gone, so flag the event as finished. */ 710 if (event == NOTE_JAIL_REMOVE) { 711 kn->kn_flags |= EV_EOF | EV_ONESHOT; 712 kn->kn_ptr.p_prison = NULL; 713 if (kn->kn_fflags == 0) 714 kn->kn_flags |= EV_DROP; 715 return (1); 716 } 717 718 return (kn->kn_fflags != 0); 719 } 720 721 /* 722 * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the 723 * interval timer support code. 724 */ 725 726 #define NOTE_TIMER_PRECMASK \ 727 (NOTE_SECONDS | NOTE_MSECONDS | NOTE_USECONDS | NOTE_NSECONDS) 728 729 static sbintime_t 730 timer2sbintime(int64_t data, int flags) 731 { 732 int64_t secs; 733 734 /* 735 * Macros for converting to the fractional second portion of an 736 * sbintime_t using 64bit multiplication to improve precision. 737 */ 738 #define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32) 739 #define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32) 740 #define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32) 741 switch (flags & NOTE_TIMER_PRECMASK) { 742 case NOTE_SECONDS: 743 #ifdef __LP64__ 744 if (data > (SBT_MAX / SBT_1S)) 745 return (SBT_MAX); 746 #endif 747 return ((sbintime_t)data << 32); 748 case NOTE_MSECONDS: /* FALLTHROUGH */ 749 case 0: 750 if (data >= 1000) { 751 secs = data / 1000; 752 #ifdef __LP64__ 753 if (secs > (SBT_MAX / SBT_1S)) 754 return (SBT_MAX); 755 #endif 756 return (secs << 32 | MS_TO_SBT(data % 1000)); 757 } 758 return (MS_TO_SBT(data)); 759 case NOTE_USECONDS: 760 if (data >= 1000000) { 761 secs = data / 1000000; 762 #ifdef __LP64__ 763 if (secs > (SBT_MAX / SBT_1S)) 764 return (SBT_MAX); 765 #endif 766 return (secs << 32 | US_TO_SBT(data % 1000000)); 767 } 768 return (US_TO_SBT(data)); 769 case NOTE_NSECONDS: 770 if (data >= 1000000000) { 771 secs = data / 1000000000; 772 #ifdef __LP64__ 773 if (secs > (SBT_MAX / SBT_1S)) 774 return (SBT_MAX); 775 #endif 776 return (secs << 32 | NS_TO_SBT(data % 1000000000)); 777 } 778 return (NS_TO_SBT(data)); 779 default: 780 break; 781 } 782 return (-1); 783 } 784 785 struct kq_timer_cb_data { 786 struct callout c; 787 struct proc *p; 788 struct knote *kn; 789 int cpuid; 790 int flags; 791 TAILQ_ENTRY(kq_timer_cb_data) link; 792 sbintime_t next; /* next timer event fires at */ 793 sbintime_t to; /* precalculated timer period, 0 for abs */ 794 }; 795 796 #define KQ_TIMER_CB_ENQUEUED 0x01 797 798 static void 799 kqtimer_sched_callout(struct kq_timer_cb_data *kc) 800 { 801 callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kc->kn, 802 kc->cpuid, C_ABSOLUTE); 803 } 804 805 void 806 kqtimer_proc_continue(struct proc *p) 807 { 808 struct kq_timer_cb_data *kc, *kc1; 809 struct bintime bt; 810 sbintime_t now; 811 812 PROC_LOCK_ASSERT(p, MA_OWNED); 813 814 getboottimebin(&bt); 815 now = bttosbt(bt); 816 817 TAILQ_FOREACH_SAFE(kc, &p->p_kqtim_stop, link, kc1) { 818 TAILQ_REMOVE(&p->p_kqtim_stop, kc, link); 819 kc->flags &= ~KQ_TIMER_CB_ENQUEUED; 820 if (kc->next <= now) 821 filt_timerexpire_l(kc->kn, true); 822 else 823 kqtimer_sched_callout(kc); 824 } 825 } 826 827 static void 828 filt_timerexpire_l(struct knote *kn, bool proc_locked) 829 { 830 struct kq_timer_cb_data *kc; 831 struct proc *p; 832 uint64_t delta; 833 sbintime_t now; 834 835 kc = kn->kn_ptr.p_v; 836 837 if ((kn->kn_flags & EV_ONESHOT) != 0 || kc->to == 0) { 838 kn->kn_data++; 839 KNOTE_ACTIVATE(kn, 0); 840 return; 841 } 842 843 now = sbinuptime(); 844 if (now >= kc->next) { 845 delta = (now - kc->next) / kc->to; 846 if (delta == 0) 847 delta = 1; 848 kn->kn_data += delta; 849 kc->next += delta * kc->to; 850 if (now >= kc->next) /* overflow */ 851 kc->next = now + kc->to; 852 KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ 853 } 854 855 /* 856 * Initial check for stopped kc->p is racy. It is fine to 857 * miss the set of the stop flags, at worst we would schedule 858 * one more callout. On the other hand, it is not fine to not 859 * schedule when we we missed clearing of the flags, we 860 * recheck them under the lock and observe consistent state. 861 */ 862 p = kc->p; 863 if (P_SHOULDSTOP(p) || P_KILLED(p)) { 864 if (!proc_locked) 865 PROC_LOCK(p); 866 if (P_SHOULDSTOP(p) || P_KILLED(p)) { 867 if ((kc->flags & KQ_TIMER_CB_ENQUEUED) == 0) { 868 kc->flags |= KQ_TIMER_CB_ENQUEUED; 869 TAILQ_INSERT_TAIL(&p->p_kqtim_stop, kc, link); 870 } 871 if (!proc_locked) 872 PROC_UNLOCK(p); 873 return; 874 } 875 if (!proc_locked) 876 PROC_UNLOCK(p); 877 } 878 kqtimer_sched_callout(kc); 879 } 880 881 static void 882 filt_timerexpire(void *knx) 883 { 884 filt_timerexpire_l(knx, false); 885 } 886 887 /* 888 * data contains amount of time to sleep 889 */ 890 static int 891 filt_timervalidate(struct knote *kn, sbintime_t *to) 892 { 893 struct bintime bt; 894 sbintime_t sbt; 895 896 if (kn->kn_sdata < 0) 897 return (EINVAL); 898 if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) 899 kn->kn_sdata = 1; 900 /* 901 * The only fflags values supported are the timer unit 902 * (precision) and the absolute time indicator. 903 */ 904 if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0) 905 return (EINVAL); 906 907 *to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); 908 if (*to < 0) 909 return (EINVAL); 910 if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { 911 getboottimebin(&bt); 912 sbt = bttosbt(bt); 913 *to = MAX(0, *to - sbt); 914 } 915 return (0); 916 } 917 918 static int 919 filt_timerattach(struct knote *kn) 920 { 921 struct kq_timer_cb_data *kc; 922 sbintime_t to; 923 int error; 924 925 to = -1; 926 error = filt_timervalidate(kn, &to); 927 if (error != 0) 928 return (error); 929 KASSERT(to > 0 || (kn->kn_flags & EV_ONESHOT) != 0 || 930 (kn->kn_sfflags & NOTE_ABSTIME) != 0, 931 ("%s: periodic timer has a calculated zero timeout", __func__)); 932 KASSERT(to >= 0, 933 ("%s: timer has a calculated negative timeout", __func__)); 934 935 if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) { 936 atomic_subtract_int(&kq_ncallouts, 1); 937 return (ENOMEM); 938 } 939 940 if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) 941 kn->kn_flags |= EV_CLEAR; /* automatically set */ 942 kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ 943 kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); 944 kc->kn = kn; 945 kc->p = curproc; 946 kc->cpuid = PCPU_GET(cpuid); 947 kc->flags = 0; 948 callout_init(&kc->c, 1); 949 filt_timerstart(kn, to); 950 951 return (0); 952 } 953 954 static int 955 filt_timercopy(struct knote *kn, struct proc *p) 956 { 957 struct kq_timer_cb_data *kc_src, *kc; 958 959 if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) { 960 atomic_subtract_int(&kq_ncallouts, 1); 961 return (ENOMEM); 962 } 963 964 kn->kn_status &= ~KN_DETACHED; 965 kc_src = kn->kn_ptr.p_v; 966 kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); 967 kc->kn = kn; 968 kc->p = p; 969 kc->flags = kc_src->flags & ~KQ_TIMER_CB_ENQUEUED; 970 kc->next = kc_src->next; 971 kc->to = kc_src->to; 972 kc->cpuid = PCPU_GET(cpuid); 973 callout_init(&kc->c, 1); 974 kqtimer_sched_callout(kc); 975 return (0); 976 } 977 978 static void 979 filt_timerstart(struct knote *kn, sbintime_t to) 980 { 981 struct kq_timer_cb_data *kc; 982 983 kc = kn->kn_ptr.p_v; 984 if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { 985 kc->next = to; 986 kc->to = 0; 987 } else { 988 kc->next = to + sbinuptime(); 989 kc->to = to; 990 } 991 kqtimer_sched_callout(kc); 992 } 993 994 static void 995 filt_timerdetach(struct knote *kn) 996 { 997 struct kq_timer_cb_data *kc; 998 unsigned int old __unused; 999 bool pending; 1000 1001 kc = kn->kn_ptr.p_v; 1002 do { 1003 callout_drain(&kc->c); 1004 1005 /* 1006 * kqtimer_proc_continue() might have rescheduled this callout. 1007 * Double-check, using the process mutex as an interlock. 1008 */ 1009 PROC_LOCK(kc->p); 1010 if ((kc->flags & KQ_TIMER_CB_ENQUEUED) != 0) { 1011 kc->flags &= ~KQ_TIMER_CB_ENQUEUED; 1012 TAILQ_REMOVE(&kc->p->p_kqtim_stop, kc, link); 1013 } 1014 pending = callout_pending(&kc->c); 1015 PROC_UNLOCK(kc->p); 1016 } while (pending); 1017 free(kc, M_KQUEUE); 1018 old = atomic_fetchadd_int(&kq_ncallouts, -1); 1019 KASSERT(old > 0, ("Number of callouts cannot become negative")); 1020 kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ 1021 } 1022 1023 static void 1024 filt_timertouch(struct knote *kn, struct kevent *kev, u_long type) 1025 { 1026 struct kq_timer_cb_data *kc; 1027 struct kqueue *kq; 1028 sbintime_t to; 1029 int error; 1030 1031 switch (type) { 1032 case EVENT_REGISTER: 1033 /* Handle re-added timers that update data/fflags */ 1034 if (kev->flags & EV_ADD) { 1035 kc = kn->kn_ptr.p_v; 1036 1037 /* Drain any existing callout. */ 1038 callout_drain(&kc->c); 1039 1040 /* Throw away any existing undelivered record 1041 * of the timer expiration. This is done under 1042 * the presumption that if a process is 1043 * re-adding this timer with new parameters, 1044 * it is no longer interested in what may have 1045 * happened under the old parameters. If it is 1046 * interested, it can wait for the expiration, 1047 * delete the old timer definition, and then 1048 * add the new one. 1049 * 1050 * This has to be done while the kq is locked: 1051 * - if enqueued, dequeue 1052 * - make it no longer active 1053 * - clear the count of expiration events 1054 */ 1055 kq = kn->kn_kq; 1056 KQ_LOCK(kq); 1057 if (kn->kn_status & KN_QUEUED) 1058 knote_dequeue(kn); 1059 1060 kn->kn_status &= ~KN_ACTIVE; 1061 kn->kn_data = 0; 1062 KQ_UNLOCK(kq); 1063 1064 /* Reschedule timer based on new data/fflags */ 1065 kn->kn_sfflags = kev->fflags; 1066 kn->kn_sdata = kev->data; 1067 error = filt_timervalidate(kn, &to); 1068 if (error != 0) { 1069 kn->kn_flags |= EV_ERROR; 1070 kn->kn_data = error; 1071 } else 1072 filt_timerstart(kn, to); 1073 } 1074 break; 1075 1076 case EVENT_PROCESS: 1077 *kev = kn->kn_kevent; 1078 if (kn->kn_flags & EV_CLEAR) { 1079 kn->kn_data = 0; 1080 kn->kn_fflags = 0; 1081 } 1082 break; 1083 1084 default: 1085 panic("filt_timertouch() - invalid type (%ld)", type); 1086 break; 1087 } 1088 } 1089 1090 static int 1091 filt_timer(struct knote *kn, long hint) 1092 { 1093 1094 return (kn->kn_data != 0); 1095 } 1096 1097 static int 1098 filt_userattach(struct knote *kn) 1099 { 1100 1101 /* 1102 * EVFILT_USER knotes are not attached to anything in the kernel. 1103 */ 1104 kn->kn_hook = NULL; 1105 if (kn->kn_fflags & NOTE_TRIGGER) 1106 kn->kn_hookid = 1; 1107 else 1108 kn->kn_hookid = 0; 1109 return (0); 1110 } 1111 1112 static void 1113 filt_userdetach(__unused struct knote *kn) 1114 { 1115 1116 /* 1117 * EVFILT_USER knotes are not attached to anything in the kernel. 1118 */ 1119 } 1120 1121 static int 1122 filt_user(struct knote *kn, __unused long hint) 1123 { 1124 1125 return (kn->kn_hookid); 1126 } 1127 1128 static void 1129 filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) 1130 { 1131 u_int ffctrl; 1132 1133 switch (type) { 1134 case EVENT_REGISTER: 1135 if (kev->fflags & NOTE_TRIGGER) 1136 kn->kn_hookid = 1; 1137 1138 ffctrl = kev->fflags & NOTE_FFCTRLMASK; 1139 kev->fflags &= NOTE_FFLAGSMASK; 1140 switch (ffctrl) { 1141 case NOTE_FFNOP: 1142 break; 1143 1144 case NOTE_FFAND: 1145 kn->kn_sfflags &= kev->fflags; 1146 break; 1147 1148 case NOTE_FFOR: 1149 kn->kn_sfflags |= kev->fflags; 1150 break; 1151 1152 case NOTE_FFCOPY: 1153 kn->kn_sfflags = kev->fflags; 1154 break; 1155 1156 default: 1157 /* XXX Return error? */ 1158 break; 1159 } 1160 kn->kn_sdata = kev->data; 1161 if (kev->flags & EV_CLEAR) { 1162 kn->kn_hookid = 0; 1163 kn->kn_data = 0; 1164 kn->kn_fflags = 0; 1165 } 1166 break; 1167 1168 case EVENT_PROCESS: 1169 *kev = kn->kn_kevent; 1170 kev->fflags = kn->kn_sfflags; 1171 kev->data = kn->kn_sdata; 1172 if (kn->kn_flags & EV_CLEAR) { 1173 kn->kn_hookid = 0; 1174 kn->kn_data = 0; 1175 kn->kn_fflags = 0; 1176 } 1177 break; 1178 1179 default: 1180 panic("filt_usertouch() - invalid type (%ld)", type); 1181 break; 1182 } 1183 } 1184 1185 int 1186 sys_kqueue(struct thread *td, struct kqueue_args *uap) 1187 { 1188 1189 return (kern_kqueue(td, 0, false, NULL)); 1190 } 1191 1192 int 1193 sys_kqueuex(struct thread *td, struct kqueuex_args *uap) 1194 { 1195 int flags; 1196 1197 if ((uap->flags & ~(KQUEUE_CLOEXEC | KQUEUE_CPONFORK)) != 0) 1198 return (EINVAL); 1199 flags = 0; 1200 if ((uap->flags & KQUEUE_CLOEXEC) != 0) 1201 flags |= O_CLOEXEC; 1202 return (kern_kqueue(td, flags, (uap->flags & KQUEUE_CPONFORK) != 0, 1203 NULL)); 1204 } 1205 1206 static void 1207 kqueue_init(struct kqueue *kq, bool cponfork) 1208 { 1209 1210 mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); 1211 TAILQ_INIT(&kq->kq_head); 1212 knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); 1213 TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); 1214 if (cponfork) 1215 kq->kq_state |= KQ_CPONFORK; 1216 } 1217 1218 static int 1219 kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip, 1220 struct file **fpp, int flags, struct filecaps *fcaps, bool cponfork, 1221 struct kqueue **kqp) 1222 { 1223 struct ucred *cred; 1224 struct kqueue *kq; 1225 int error; 1226 1227 cred = td->td_ucred; 1228 if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) 1229 return (ENOMEM); 1230 1231 error = fdip != NULL ? falloc_caps(td, fpp, fdip, flags, fcaps) : 1232 _falloc_noinstall(td, fpp, 1); 1233 if (error != 0) { 1234 chgkqcnt(cred->cr_ruidinfo, -1, 0); 1235 return (error); 1236 } 1237 1238 /* An extra reference on `fp' has been held for us by falloc(). */ 1239 kq = malloc(sizeof(*kq), M_KQUEUE, M_WAITOK | M_ZERO); 1240 kqueue_init(kq, cponfork); 1241 kq->kq_fdp = fdp; 1242 kq->kq_cred = crhold(cred); 1243 1244 if (fdip != NULL) 1245 FILEDESC_XLOCK(fdp); 1246 TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); 1247 if (fdip != NULL) 1248 FILEDESC_XUNLOCK(fdp); 1249 1250 finit(*fpp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); 1251 *kqp = kq; 1252 return (0); 1253 } 1254 1255 int 1256 kern_kqueue(struct thread *td, int flags, bool cponfork, struct filecaps *fcaps) 1257 { 1258 struct kqueue *kq; 1259 struct file *fp; 1260 int fd, error; 1261 1262 error = kern_kqueue_alloc(td, td->td_proc->p_fd, &fd, &fp, flags, 1263 fcaps, cponfork, &kq); 1264 if (error != 0) 1265 return (error); 1266 1267 fdrop(fp, td); 1268 1269 td->td_retval[0] = fd; 1270 return (0); 1271 } 1272 1273 struct g_kevent_args { 1274 int fd; 1275 const void *changelist; 1276 int nchanges; 1277 void *eventlist; 1278 int nevents; 1279 const struct timespec *timeout; 1280 }; 1281 1282 int 1283 sys_kevent(struct thread *td, struct kevent_args *uap) 1284 { 1285 struct kevent_copyops k_ops = { 1286 .arg = uap, 1287 .k_copyout = kevent_copyout, 1288 .k_copyin = kevent_copyin, 1289 .kevent_size = sizeof(struct kevent), 1290 }; 1291 struct g_kevent_args gk_args = { 1292 .fd = uap->fd, 1293 .changelist = uap->changelist, 1294 .nchanges = uap->nchanges, 1295 .eventlist = uap->eventlist, 1296 .nevents = uap->nevents, 1297 .timeout = uap->timeout, 1298 }; 1299 1300 return (kern_kevent_generic(td, &gk_args, &k_ops, "kevent")); 1301 } 1302 1303 static int 1304 kern_kevent_generic(struct thread *td, struct g_kevent_args *uap, 1305 struct kevent_copyops *k_ops, const char *struct_name) 1306 { 1307 struct timespec ts, *tsp; 1308 #ifdef KTRACE 1309 struct kevent *eventlist = uap->eventlist; 1310 #endif 1311 int error; 1312 1313 if (uap->timeout != NULL) { 1314 error = copyin(uap->timeout, &ts, sizeof(ts)); 1315 if (error) 1316 return (error); 1317 tsp = &ts; 1318 } else 1319 tsp = NULL; 1320 1321 #ifdef KTRACE 1322 if (KTRPOINT(td, KTR_STRUCT_ARRAY)) 1323 ktrstructarray(struct_name, UIO_USERSPACE, uap->changelist, 1324 uap->nchanges, k_ops->kevent_size); 1325 #endif 1326 1327 error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, 1328 k_ops, tsp); 1329 1330 #ifdef KTRACE 1331 if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) 1332 ktrstructarray(struct_name, UIO_USERSPACE, eventlist, 1333 td->td_retval[0], k_ops->kevent_size); 1334 #endif 1335 1336 return (error); 1337 } 1338 1339 /* 1340 * Copy 'count' items into the destination list pointed to by uap->eventlist. 1341 */ 1342 static int 1343 kevent_copyout(void *arg, struct kevent *kevp, int count) 1344 { 1345 struct kevent_args *uap; 1346 int error; 1347 1348 KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 1349 uap = (struct kevent_args *)arg; 1350 1351 error = copyout(kevp, uap->eventlist, count * sizeof *kevp); 1352 if (error == 0) 1353 uap->eventlist += count; 1354 return (error); 1355 } 1356 1357 /* 1358 * Copy 'count' items from the list pointed to by uap->changelist. 1359 */ 1360 static int 1361 kevent_copyin(void *arg, struct kevent *kevp, int count) 1362 { 1363 struct kevent_args *uap; 1364 int error; 1365 1366 KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 1367 uap = (struct kevent_args *)arg; 1368 1369 error = copyin(uap->changelist, kevp, count * sizeof *kevp); 1370 if (error == 0) 1371 uap->changelist += count; 1372 return (error); 1373 } 1374 1375 #ifdef COMPAT_FREEBSD11 1376 static int 1377 kevent11_copyout(void *arg, struct kevent *kevp, int count) 1378 { 1379 struct freebsd11_kevent_args *uap; 1380 struct freebsd11_kevent kev11; 1381 int error, i; 1382 1383 KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 1384 uap = (struct freebsd11_kevent_args *)arg; 1385 1386 for (i = 0; i < count; i++) { 1387 kev11.ident = kevp->ident; 1388 kev11.filter = kevp->filter; 1389 kev11.flags = kevp->flags; 1390 kev11.fflags = kevp->fflags; 1391 kev11.data = kevp->data; 1392 kev11.udata = kevp->udata; 1393 error = copyout(&kev11, uap->eventlist, sizeof(kev11)); 1394 if (error != 0) 1395 break; 1396 uap->eventlist++; 1397 kevp++; 1398 } 1399 return (error); 1400 } 1401 1402 /* 1403 * Copy 'count' items from the list pointed to by uap->changelist. 1404 */ 1405 static int 1406 kevent11_copyin(void *arg, struct kevent *kevp, int count) 1407 { 1408 struct freebsd11_kevent_args *uap; 1409 struct freebsd11_kevent kev11; 1410 int error, i; 1411 1412 KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); 1413 uap = (struct freebsd11_kevent_args *)arg; 1414 1415 for (i = 0; i < count; i++) { 1416 error = copyin(uap->changelist, &kev11, sizeof(kev11)); 1417 if (error != 0) 1418 break; 1419 kevp->ident = kev11.ident; 1420 kevp->filter = kev11.filter; 1421 kevp->flags = kev11.flags; 1422 kevp->fflags = kev11.fflags; 1423 kevp->data = (uintptr_t)kev11.data; 1424 kevp->udata = kev11.udata; 1425 bzero(&kevp->ext, sizeof(kevp->ext)); 1426 uap->changelist++; 1427 kevp++; 1428 } 1429 return (error); 1430 } 1431 1432 int 1433 freebsd11_kevent(struct thread *td, struct freebsd11_kevent_args *uap) 1434 { 1435 struct kevent_copyops k_ops = { 1436 .arg = uap, 1437 .k_copyout = kevent11_copyout, 1438 .k_copyin = kevent11_copyin, 1439 .kevent_size = sizeof(struct freebsd11_kevent), 1440 }; 1441 struct g_kevent_args gk_args = { 1442 .fd = uap->fd, 1443 .changelist = uap->changelist, 1444 .nchanges = uap->nchanges, 1445 .eventlist = uap->eventlist, 1446 .nevents = uap->nevents, 1447 .timeout = uap->timeout, 1448 }; 1449 1450 return (kern_kevent_generic(td, &gk_args, &k_ops, "freebsd11_kevent")); 1451 } 1452 #endif 1453 1454 int 1455 kern_kevent(struct thread *td, int fd, int nchanges, int nevents, 1456 struct kevent_copyops *k_ops, const struct timespec *timeout) 1457 { 1458 cap_rights_t rights; 1459 struct file *fp; 1460 int error; 1461 1462 cap_rights_init_zero(&rights); 1463 if (nchanges > 0) 1464 cap_rights_set_one(&rights, CAP_KQUEUE_CHANGE); 1465 if (nevents > 0) 1466 cap_rights_set_one(&rights, CAP_KQUEUE_EVENT); 1467 error = fget(td, fd, &rights, &fp); 1468 if (error != 0) 1469 return (error); 1470 1471 error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout); 1472 fdrop(fp, td); 1473 1474 return (error); 1475 } 1476 1477 static int 1478 kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents, 1479 struct kevent_copyops *k_ops, const struct timespec *timeout) 1480 { 1481 struct kevent keva[KQ_NEVENTS]; 1482 struct kevent *kevp, *changes; 1483 int i, n, nerrors, error; 1484 1485 if (nchanges < 0) 1486 return (EINVAL); 1487 1488 nerrors = 0; 1489 while (nchanges > 0) { 1490 n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; 1491 error = k_ops->k_copyin(k_ops->arg, keva, n); 1492 if (error) 1493 return (error); 1494 changes = keva; 1495 for (i = 0; i < n; i++) { 1496 kevp = &changes[i]; 1497 if (!kevp->filter) 1498 continue; 1499 kevp->flags &= ~EV_SYSFLAGS; 1500 error = kqueue_register(kq, kevp, td, M_WAITOK); 1501 if (error || (kevp->flags & EV_RECEIPT)) { 1502 if (nevents == 0) 1503 return (error); 1504 kevp->flags = EV_ERROR; 1505 kevp->data = error; 1506 (void)k_ops->k_copyout(k_ops->arg, kevp, 1); 1507 nevents--; 1508 nerrors++; 1509 } 1510 } 1511 nchanges -= n; 1512 } 1513 if (nerrors) { 1514 td->td_retval[0] = nerrors; 1515 return (0); 1516 } 1517 1518 return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td)); 1519 } 1520 1521 int 1522 kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, 1523 struct kevent_copyops *k_ops, const struct timespec *timeout) 1524 { 1525 struct kqueue *kq; 1526 int error; 1527 1528 error = kqueue_acquire(fp, &kq); 1529 if (error != 0) 1530 return (error); 1531 error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout); 1532 kqueue_release(kq, 0); 1533 return (error); 1534 } 1535 1536 /* 1537 * Performs a kevent() call on a temporarily created kqueue. This can be 1538 * used to perform one-shot polling, similar to poll() and select(). 1539 */ 1540 int 1541 kern_kevent_anonymous(struct thread *td, int nevents, 1542 struct kevent_copyops *k_ops) 1543 { 1544 struct kqueue kq = {}; 1545 int error; 1546 1547 kqueue_init(&kq, false); 1548 kq.kq_refcnt = 1; 1549 error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); 1550 kqueue_drain(&kq, td); 1551 kqueue_destroy(&kq); 1552 return (error); 1553 } 1554 1555 int 1556 kqueue_add_filteropts(int filt, const struct filterops *filtops) 1557 { 1558 int error; 1559 1560 error = 0; 1561 if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { 1562 printf( 1563 "trying to add a filterop that is out of range: %d is beyond %d\n", 1564 ~filt, EVFILT_SYSCOUNT); 1565 return EINVAL; 1566 } 1567 mtx_lock(&filterops_lock); 1568 if (sysfilt_ops[~filt].for_fop != &null_filtops && 1569 sysfilt_ops[~filt].for_fop != NULL) 1570 error = EEXIST; 1571 else { 1572 sysfilt_ops[~filt].for_fop = filtops; 1573 sysfilt_ops[~filt].for_refcnt = 0; 1574 } 1575 mtx_unlock(&filterops_lock); 1576 1577 return (error); 1578 } 1579 1580 int 1581 kqueue_del_filteropts(int filt) 1582 { 1583 int error; 1584 1585 error = 0; 1586 if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 1587 return EINVAL; 1588 1589 mtx_lock(&filterops_lock); 1590 if (sysfilt_ops[~filt].for_fop == &null_filtops || 1591 sysfilt_ops[~filt].for_fop == NULL) 1592 error = EINVAL; 1593 else if (sysfilt_ops[~filt].for_refcnt != 0) 1594 error = EBUSY; 1595 else { 1596 sysfilt_ops[~filt].for_fop = &null_filtops; 1597 sysfilt_ops[~filt].for_refcnt = 0; 1598 } 1599 mtx_unlock(&filterops_lock); 1600 1601 return error; 1602 } 1603 1604 static const struct filterops * 1605 kqueue_fo_find(int filt) 1606 { 1607 1608 if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 1609 return NULL; 1610 1611 if (sysfilt_ops[~filt].for_nolock) 1612 return sysfilt_ops[~filt].for_fop; 1613 1614 mtx_lock(&filterops_lock); 1615 sysfilt_ops[~filt].for_refcnt++; 1616 if (sysfilt_ops[~filt].for_fop == NULL) 1617 sysfilt_ops[~filt].for_fop = &null_filtops; 1618 mtx_unlock(&filterops_lock); 1619 1620 return sysfilt_ops[~filt].for_fop; 1621 } 1622 1623 static void 1624 kqueue_fo_release(int filt) 1625 { 1626 1627 if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) 1628 return; 1629 1630 if (sysfilt_ops[~filt].for_nolock) 1631 return; 1632 1633 mtx_lock(&filterops_lock); 1634 KASSERT(sysfilt_ops[~filt].for_refcnt > 0, 1635 ("filter object %d refcount not valid on release", filt)); 1636 sysfilt_ops[~filt].for_refcnt--; 1637 mtx_unlock(&filterops_lock); 1638 } 1639 1640 /* 1641 * A ref to kq (obtained via kqueue_acquire) must be held. 1642 */ 1643 static int 1644 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, 1645 int mflag) 1646 { 1647 const struct filterops *fops; 1648 struct file *fp; 1649 struct knote *kn, *tkn; 1650 struct knlist *knl; 1651 int error, filt, event; 1652 int haskqglobal, filedesc_unlock; 1653 1654 if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE)) 1655 return (EINVAL); 1656 1657 fp = NULL; 1658 kn = NULL; 1659 knl = NULL; 1660 error = 0; 1661 haskqglobal = 0; 1662 filedesc_unlock = 0; 1663 1664 filt = kev->filter; 1665 fops = kqueue_fo_find(filt); 1666 if (fops == NULL) 1667 return EINVAL; 1668 1669 if (kev->flags & EV_ADD) { 1670 /* Reject an invalid flag pair early */ 1671 if (kev->flags & EV_KEEPUDATA) { 1672 tkn = NULL; 1673 error = EINVAL; 1674 goto done; 1675 } 1676 1677 /* 1678 * Prevent waiting with locks. Non-sleepable 1679 * allocation failures are handled in the loop, only 1680 * if the spare knote appears to be actually required. 1681 */ 1682 tkn = knote_alloc(mflag); 1683 } else { 1684 tkn = NULL; 1685 } 1686 1687 findkn: 1688 if (fops->f_isfd) { 1689 KASSERT(td != NULL, ("td is NULL")); 1690 if (kev->ident > INT_MAX) 1691 error = EBADF; 1692 else 1693 error = fget(td, kev->ident, &cap_event_rights, &fp); 1694 if (error) 1695 goto done; 1696 1697 if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, 1698 kev->ident, M_NOWAIT) != 0) { 1699 /* try again */ 1700 fdrop(fp, td); 1701 fp = NULL; 1702 error = kqueue_expand(kq, fops, kev->ident, mflag); 1703 if (error) 1704 goto done; 1705 goto findkn; 1706 } 1707 1708 if (fp->f_type == DTYPE_KQUEUE) { 1709 /* 1710 * If we add some intelligence about what we are doing, 1711 * we should be able to support events on ourselves. 1712 * We need to know when we are doing this to prevent 1713 * getting both the knlist lock and the kq lock since 1714 * they are the same thing. 1715 */ 1716 if (fp->f_data == kq) { 1717 error = EINVAL; 1718 goto done; 1719 } 1720 1721 /* 1722 * Pre-lock the filedesc before the global 1723 * lock mutex, see the comment in 1724 * kqueue_close(). 1725 */ 1726 FILEDESC_XLOCK(td->td_proc->p_fd); 1727 filedesc_unlock = 1; 1728 KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 1729 } 1730 1731 KQ_LOCK(kq); 1732 if (kev->ident < kq->kq_knlistsize) { 1733 SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) 1734 if (kev->filter == kn->kn_filter) 1735 break; 1736 } 1737 } else { 1738 if ((kev->flags & EV_ADD) == EV_ADD) { 1739 error = kqueue_expand(kq, fops, kev->ident, mflag); 1740 if (error != 0) 1741 goto done; 1742 } 1743 1744 KQ_LOCK(kq); 1745 1746 /* 1747 * If possible, find an existing knote to use for this kevent. 1748 */ 1749 if (kev->filter == EVFILT_PROC && 1750 (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { 1751 /* This is an internal creation of a process tracking 1752 * note. Don't attempt to coalesce this with an 1753 * existing note. 1754 */ 1755 ; 1756 } else if (kq->kq_knhashmask != 0) { 1757 struct klist *list; 1758 1759 list = &kq->kq_knhash[ 1760 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1761 SLIST_FOREACH(kn, list, kn_link) 1762 if (kev->ident == kn->kn_id && 1763 kev->filter == kn->kn_filter) 1764 break; 1765 } 1766 } 1767 1768 /* knote is in the process of changing, wait for it to stabilize. */ 1769 if (kn != NULL && kn_in_flux(kn)) { 1770 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1771 if (filedesc_unlock) { 1772 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1773 filedesc_unlock = 0; 1774 } 1775 kq->kq_state |= KQ_FLUXWAIT; 1776 msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); 1777 if (fp != NULL) { 1778 fdrop(fp, td); 1779 fp = NULL; 1780 } 1781 goto findkn; 1782 } 1783 1784 /* 1785 * kn now contains the matching knote, or NULL if no match 1786 */ 1787 if (kn == NULL) { 1788 if (kev->flags & EV_ADD) { 1789 kn = tkn; 1790 tkn = NULL; 1791 if (kn == NULL) { 1792 KQ_UNLOCK(kq); 1793 error = ENOMEM; 1794 goto done; 1795 } 1796 kn->kn_fp = fp; 1797 kn->kn_kq = kq; 1798 kn->kn_fop = fops; 1799 /* 1800 * apply reference counts to knote structure, and 1801 * do not release it at the end of this routine. 1802 */ 1803 fops = NULL; 1804 fp = NULL; 1805 1806 kn->kn_sfflags = kev->fflags; 1807 kn->kn_sdata = kev->data; 1808 kev->fflags = 0; 1809 kev->data = 0; 1810 kn->kn_kevent = *kev; 1811 kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | 1812 EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); 1813 kn->kn_status = KN_DETACHED; 1814 if ((kev->flags & EV_DISABLE) != 0) 1815 kn->kn_status |= KN_DISABLED; 1816 kn_enter_flux(kn); 1817 1818 error = knote_attach(kn, kq); 1819 KQ_UNLOCK(kq); 1820 if (error != 0) { 1821 tkn = kn; 1822 goto done; 1823 } 1824 1825 if ((error = kn->kn_fop->f_attach(kn)) != 0) { 1826 knote_drop_detached(kn, td); 1827 goto done; 1828 } 1829 knl = kn_list_lock(kn); 1830 goto done_ev_add; 1831 } else { 1832 /* No matching knote and the EV_ADD flag is not set. */ 1833 KQ_UNLOCK(kq); 1834 error = ENOENT; 1835 goto done; 1836 } 1837 } 1838 1839 if (kev->flags & EV_DELETE) { 1840 kn_enter_flux(kn); 1841 KQ_UNLOCK(kq); 1842 knote_drop(kn, td); 1843 goto done; 1844 } 1845 1846 if (kev->flags & EV_FORCEONESHOT) { 1847 kn->kn_flags |= EV_ONESHOT; 1848 KNOTE_ACTIVATE(kn, 1); 1849 } 1850 1851 if ((kev->flags & EV_ENABLE) != 0) 1852 kn->kn_status &= ~KN_DISABLED; 1853 else if ((kev->flags & EV_DISABLE) != 0) 1854 kn->kn_status |= KN_DISABLED; 1855 1856 /* 1857 * The user may change some filter values after the initial EV_ADD, 1858 * but doing so will not reset any filter which has already been 1859 * triggered. 1860 */ 1861 kn->kn_status |= KN_SCAN; 1862 kn_enter_flux(kn); 1863 KQ_UNLOCK(kq); 1864 knl = kn_list_lock(kn); 1865 if ((kev->flags & EV_KEEPUDATA) == 0) 1866 kn->kn_kevent.udata = kev->udata; 1867 if (!fops->f_isfd && fops->f_touch != NULL) { 1868 fops->f_touch(kn, kev, EVENT_REGISTER); 1869 } else { 1870 kn->kn_sfflags = kev->fflags; 1871 kn->kn_sdata = kev->data; 1872 } 1873 1874 done_ev_add: 1875 /* 1876 * We can get here with kn->kn_knlist == NULL. This can happen when 1877 * the initial attach event decides that the event is "completed" 1878 * already, e.g., filt_procattach() is called on a zombie process. It 1879 * will call filt_proc() which will remove it from the list, and NULL 1880 * kn_knlist. 1881 * 1882 * KN_DISABLED will be stable while the knote is in flux, so the 1883 * unlocked read will not race with an update. 1884 */ 1885 if ((kn->kn_status & KN_DISABLED) == 0) 1886 event = kn->kn_fop->f_event(kn, 0); 1887 else 1888 event = 0; 1889 1890 KQ_LOCK(kq); 1891 if (event) 1892 kn->kn_status |= KN_ACTIVE; 1893 if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == 1894 KN_ACTIVE) 1895 knote_enqueue(kn); 1896 kn->kn_status &= ~KN_SCAN; 1897 kn_leave_flux(kn); 1898 kn_list_unlock(knl); 1899 KQ_UNLOCK_FLUX(kq); 1900 1901 done: 1902 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 1903 if (filedesc_unlock) 1904 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1905 if (fp != NULL) 1906 fdrop(fp, td); 1907 knote_free(tkn); 1908 if (fops != NULL) 1909 kqueue_fo_release(filt); 1910 return (error); 1911 } 1912 1913 static int 1914 kqueue_acquire_ref(struct kqueue *kq) 1915 { 1916 KQ_LOCK(kq); 1917 if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { 1918 KQ_UNLOCK(kq); 1919 return (EBADF); 1920 } 1921 kq->kq_refcnt++; 1922 KQ_UNLOCK(kq); 1923 return (0); 1924 } 1925 1926 static int 1927 kqueue_acquire(struct file *fp, struct kqueue **kqp) 1928 { 1929 struct kqueue *kq; 1930 int error; 1931 1932 kq = fp->f_data; 1933 if (fp->f_type != DTYPE_KQUEUE || kq == NULL) 1934 return (EINVAL); 1935 error = kqueue_acquire_ref(kq); 1936 if (error == 0) 1937 *kqp = kq; 1938 return (error); 1939 } 1940 1941 static void 1942 kqueue_release(struct kqueue *kq, int locked) 1943 { 1944 if (locked) 1945 KQ_OWNED(kq); 1946 else 1947 KQ_LOCK(kq); 1948 kq->kq_refcnt--; 1949 if (kq->kq_refcnt == 1) 1950 wakeup(&kq->kq_refcnt); 1951 if (!locked) 1952 KQ_UNLOCK(kq); 1953 } 1954 1955 static void 1956 ast_kqueue(struct thread *td, int tda __unused) 1957 { 1958 taskqueue_quiesce(taskqueue_kqueue_ctx); 1959 } 1960 1961 static void 1962 kqueue_schedtask(struct kqueue *kq) 1963 { 1964 KQ_OWNED(kq); 1965 KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), 1966 ("scheduling kqueue task while draining")); 1967 1968 if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { 1969 taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task); 1970 kq->kq_state |= KQ_TASKSCHED; 1971 ast_sched(curthread, TDA_KQUEUE); 1972 } 1973 } 1974 1975 /* 1976 * Expand the kq to make sure we have storage for fops/ident pair. 1977 * 1978 * Return 0 on success (or no work necessary), return errno on failure. 1979 */ 1980 static int 1981 kqueue_expand(struct kqueue *kq, const struct filterops *fops, uintptr_t ident, 1982 int mflag) 1983 { 1984 struct klist *list, *tmp_knhash, *to_free; 1985 u_long tmp_knhashmask; 1986 int error, fd, size; 1987 1988 KQ_NOTOWNED(kq); 1989 1990 error = 0; 1991 to_free = NULL; 1992 if (fops->f_isfd) { 1993 fd = ident; 1994 if (kq->kq_knlistsize <= fd) { 1995 size = kq->kq_knlistsize; 1996 while (size <= fd) 1997 size += KQEXTENT; 1998 list = malloc(size * sizeof(*list), M_KQUEUE, mflag); 1999 if (list == NULL) 2000 return ENOMEM; 2001 KQ_LOCK(kq); 2002 if ((kq->kq_state & KQ_CLOSING) != 0) { 2003 to_free = list; 2004 error = EBADF; 2005 } else if (kq->kq_knlistsize > fd) { 2006 to_free = list; 2007 } else { 2008 if (kq->kq_knlist != NULL) { 2009 bcopy(kq->kq_knlist, list, 2010 kq->kq_knlistsize * sizeof(*list)); 2011 to_free = kq->kq_knlist; 2012 kq->kq_knlist = NULL; 2013 } 2014 bzero((caddr_t)list + 2015 kq->kq_knlistsize * sizeof(*list), 2016 (size - kq->kq_knlistsize) * sizeof(*list)); 2017 kq->kq_knlistsize = size; 2018 kq->kq_knlist = list; 2019 } 2020 KQ_UNLOCK(kq); 2021 } 2022 } else { 2023 if (kq->kq_knhashmask == 0) { 2024 tmp_knhash = hashinit_flags(KN_HASHSIZE, M_KQUEUE, 2025 &tmp_knhashmask, (mflag & M_WAITOK) != 0 ? 2026 HASH_WAITOK : HASH_NOWAIT); 2027 if (tmp_knhash == NULL) 2028 return (ENOMEM); 2029 KQ_LOCK(kq); 2030 if ((kq->kq_state & KQ_CLOSING) != 0) { 2031 to_free = tmp_knhash; 2032 error = EBADF; 2033 } else if (kq->kq_knhashmask == 0) { 2034 kq->kq_knhash = tmp_knhash; 2035 kq->kq_knhashmask = tmp_knhashmask; 2036 } else { 2037 to_free = tmp_knhash; 2038 } 2039 KQ_UNLOCK(kq); 2040 } 2041 } 2042 free(to_free, M_KQUEUE); 2043 2044 KQ_NOTOWNED(kq); 2045 return (error); 2046 } 2047 2048 static void 2049 kqueue_task(void *arg, int pending) 2050 { 2051 struct kqueue *kq; 2052 int haskqglobal; 2053 2054 haskqglobal = 0; 2055 kq = arg; 2056 2057 KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 2058 KQ_LOCK(kq); 2059 2060 KNOTE_LOCKED(&kq->kq_sel.si_note, 0); 2061 2062 kq->kq_state &= ~KQ_TASKSCHED; 2063 if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { 2064 wakeup(&kq->kq_state); 2065 } 2066 KQ_UNLOCK(kq); 2067 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 2068 } 2069 2070 /* 2071 * Scan, update kn_data (if not ONESHOT), and copyout triggered events. 2072 * We treat KN_MARKER knotes as if they are in flux. 2073 */ 2074 static int 2075 kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, 2076 const struct timespec *tsp, struct kevent *keva, struct thread *td) 2077 { 2078 struct kevent *kevp; 2079 struct knote *kn, *marker; 2080 struct knlist *knl; 2081 sbintime_t asbt, rsbt; 2082 int count, error, haskqglobal, influx, nkev, touch; 2083 2084 count = maxevents; 2085 nkev = 0; 2086 error = 0; 2087 haskqglobal = 0; 2088 2089 if (maxevents == 0) 2090 goto done_nl; 2091 if (maxevents < 0) { 2092 error = EINVAL; 2093 goto done_nl; 2094 } 2095 2096 rsbt = 0; 2097 if (tsp != NULL) { 2098 if (!timespecvalid_interval(tsp)) { 2099 error = EINVAL; 2100 goto done_nl; 2101 } 2102 if (timespecisset(tsp)) { 2103 if (tsp->tv_sec <= INT32_MAX) { 2104 rsbt = tstosbt(*tsp); 2105 if (TIMESEL(&asbt, rsbt)) 2106 asbt += tc_tick_sbt; 2107 if (asbt <= SBT_MAX - rsbt) 2108 asbt += rsbt; 2109 else 2110 asbt = 0; 2111 rsbt >>= tc_precexp; 2112 } else 2113 asbt = 0; 2114 } else 2115 asbt = -1; 2116 } else 2117 asbt = 0; 2118 marker = knote_alloc(M_WAITOK); 2119 marker->kn_status = KN_MARKER; 2120 KQ_LOCK(kq); 2121 2122 retry: 2123 kevp = keva; 2124 if (kq->kq_count == 0) { 2125 if (asbt == -1) { 2126 error = EWOULDBLOCK; 2127 } else { 2128 kq->kq_state |= KQ_SLEEP; 2129 error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH, 2130 "kqread", asbt, rsbt, C_ABSOLUTE); 2131 } 2132 if (error == 0) 2133 goto retry; 2134 /* don't restart after signals... */ 2135 if (error == ERESTART) 2136 error = EINTR; 2137 else if (error == EWOULDBLOCK) 2138 error = 0; 2139 goto done; 2140 } 2141 2142 TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); 2143 influx = 0; 2144 while (count) { 2145 KQ_OWNED(kq); 2146 kn = TAILQ_FIRST(&kq->kq_head); 2147 2148 if ((kn->kn_status == KN_MARKER && kn != marker) || 2149 kn_in_flux(kn)) { 2150 if (influx) { 2151 influx = 0; 2152 KQ_FLUX_WAKEUP(kq); 2153 } 2154 kq->kq_state |= KQ_FLUXWAIT; 2155 error = msleep(kq, &kq->kq_lock, PSOCK, 2156 "kqflxwt", 0); 2157 continue; 2158 } 2159 2160 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 2161 if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { 2162 kn->kn_status &= ~KN_QUEUED; 2163 kq->kq_count--; 2164 continue; 2165 } 2166 if (kn == marker) { 2167 KQ_FLUX_WAKEUP(kq); 2168 if (count == maxevents) 2169 goto retry; 2170 goto done; 2171 } 2172 KASSERT(!kn_in_flux(kn), 2173 ("knote %p is unexpectedly in flux", kn)); 2174 2175 if ((kn->kn_flags & EV_DROP) == EV_DROP) { 2176 kn->kn_status &= ~KN_QUEUED; 2177 kn_enter_flux(kn); 2178 kq->kq_count--; 2179 KQ_UNLOCK(kq); 2180 /* 2181 * We don't need to lock the list since we've 2182 * marked it as in flux. 2183 */ 2184 knote_drop(kn, td); 2185 KQ_LOCK(kq); 2186 continue; 2187 } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { 2188 kn->kn_status &= ~KN_QUEUED; 2189 kn_enter_flux(kn); 2190 kq->kq_count--; 2191 KQ_UNLOCK(kq); 2192 /* 2193 * We don't need to lock the list since we've 2194 * marked the knote as being in flux. 2195 */ 2196 *kevp = kn->kn_kevent; 2197 knote_drop(kn, td); 2198 KQ_LOCK(kq); 2199 kn = NULL; 2200 } else { 2201 kn->kn_status |= KN_SCAN; 2202 kn_enter_flux(kn); 2203 KQ_UNLOCK(kq); 2204 if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) 2205 KQ_GLOBAL_LOCK(&kq_global, haskqglobal); 2206 knl = kn_list_lock(kn); 2207 if (kn->kn_fop->f_event(kn, 0) == 0) { 2208 KQ_LOCK(kq); 2209 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 2210 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | 2211 KN_SCAN); 2212 kn_leave_flux(kn); 2213 kq->kq_count--; 2214 kn_list_unlock(knl); 2215 influx = 1; 2216 continue; 2217 } 2218 touch = (!kn->kn_fop->f_isfd && 2219 kn->kn_fop->f_touch != NULL); 2220 if (touch) 2221 kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); 2222 else 2223 *kevp = kn->kn_kevent; 2224 KQ_LOCK(kq); 2225 KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); 2226 if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { 2227 /* 2228 * Manually clear knotes who weren't 2229 * 'touch'ed. 2230 */ 2231 if (touch == 0 && kn->kn_flags & EV_CLEAR) { 2232 kn->kn_data = 0; 2233 kn->kn_fflags = 0; 2234 } 2235 if (kn->kn_flags & EV_DISPATCH) 2236 kn->kn_status |= KN_DISABLED; 2237 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 2238 kq->kq_count--; 2239 } else 2240 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 2241 2242 kn->kn_status &= ~KN_SCAN; 2243 kn_leave_flux(kn); 2244 kn_list_unlock(knl); 2245 influx = 1; 2246 } 2247 2248 /* we are returning a copy to the user */ 2249 kevp++; 2250 nkev++; 2251 count--; 2252 2253 if (nkev == KQ_NEVENTS) { 2254 influx = 0; 2255 KQ_UNLOCK_FLUX(kq); 2256 error = k_ops->k_copyout(k_ops->arg, keva, nkev); 2257 nkev = 0; 2258 kevp = keva; 2259 KQ_LOCK(kq); 2260 if (error) 2261 break; 2262 } 2263 } 2264 TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); 2265 done: 2266 KQ_OWNED(kq); 2267 KQ_UNLOCK_FLUX(kq); 2268 knote_free(marker); 2269 done_nl: 2270 KQ_NOTOWNED(kq); 2271 if (nkev != 0) 2272 error = k_ops->k_copyout(k_ops->arg, keva, nkev); 2273 td->td_retval[0] = maxevents - count; 2274 return (error); 2275 } 2276 2277 /*ARGSUSED*/ 2278 static int 2279 kqueue_ioctl(struct file *fp, u_long cmd, void *data, 2280 struct ucred *active_cred, struct thread *td) 2281 { 2282 /* 2283 * Enabling sigio causes two major problems: 2284 * 1) infinite recursion: 2285 * Synopsys: kevent is being used to track signals and have FIOASYNC 2286 * set. On receipt of a signal this will cause a kqueue to recurse 2287 * into itself over and over. Sending the sigio causes the kqueue 2288 * to become ready, which in turn posts sigio again, forever. 2289 * Solution: this can be solved by setting a flag in the kqueue that 2290 * we have a SIGIO in progress. 2291 * 2) locking problems: 2292 * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts 2293 * us above the proc and pgrp locks. 2294 * Solution: Post a signal using an async mechanism, being sure to 2295 * record a generation count in the delivery so that we do not deliver 2296 * a signal to the wrong process. 2297 * 2298 * Note, these two mechanisms are somewhat mutually exclusive! 2299 */ 2300 #if 0 2301 struct kqueue *kq; 2302 2303 kq = fp->f_data; 2304 switch (cmd) { 2305 case FIOASYNC: 2306 if (*(int *)data) { 2307 kq->kq_state |= KQ_ASYNC; 2308 } else { 2309 kq->kq_state &= ~KQ_ASYNC; 2310 } 2311 return (0); 2312 2313 case FIOSETOWN: 2314 return (fsetown(*(int *)data, &kq->kq_sigio)); 2315 2316 case FIOGETOWN: 2317 *(int *)data = fgetown(&kq->kq_sigio); 2318 return (0); 2319 } 2320 #endif 2321 2322 return (ENOTTY); 2323 } 2324 2325 /*ARGSUSED*/ 2326 static int 2327 kqueue_poll(struct file *fp, int events, struct ucred *active_cred, 2328 struct thread *td) 2329 { 2330 struct kqueue *kq; 2331 int revents = 0; 2332 int error; 2333 2334 if ((error = kqueue_acquire(fp, &kq))) 2335 return POLLERR; 2336 2337 KQ_LOCK(kq); 2338 if (events & (POLLIN | POLLRDNORM)) { 2339 if (kq->kq_count) { 2340 revents |= events & (POLLIN | POLLRDNORM); 2341 } else { 2342 selrecord(td, &kq->kq_sel); 2343 if (SEL_WAITING(&kq->kq_sel)) 2344 kq->kq_state |= KQ_SEL; 2345 } 2346 } 2347 kqueue_release(kq, 1); 2348 KQ_UNLOCK(kq); 2349 return (revents); 2350 } 2351 2352 /*ARGSUSED*/ 2353 static int 2354 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 2355 { 2356 2357 bzero((void *)st, sizeof *st); 2358 /* 2359 * We no longer return kq_count because the unlocked value is useless. 2360 * If you spent all this time getting the count, why not spend your 2361 * syscall better by calling kevent? 2362 * 2363 * XXX - This is needed for libc_r. 2364 */ 2365 st->st_mode = S_IFIFO; 2366 return (0); 2367 } 2368 2369 static void 2370 kqueue_drain(struct kqueue *kq, struct thread *td) 2371 { 2372 struct knote *kn; 2373 int i; 2374 2375 KQ_LOCK(kq); 2376 2377 KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, 2378 ("kqueue already closing")); 2379 kq->kq_state |= KQ_CLOSING; 2380 if (kq->kq_refcnt > 1) 2381 msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); 2382 2383 KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); 2384 2385 KASSERT(knlist_empty(&kq->kq_sel.si_note), 2386 ("kqueue's knlist not empty")); 2387 2388 for (i = 0; i < kq->kq_knlistsize; i++) { 2389 while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { 2390 if (kn_in_flux(kn)) { 2391 kq->kq_state |= KQ_FLUXWAIT; 2392 msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); 2393 continue; 2394 } 2395 kn_enter_flux(kn); 2396 KQ_UNLOCK(kq); 2397 knote_drop(kn, td); 2398 KQ_LOCK(kq); 2399 } 2400 } 2401 if (kq->kq_knhashmask != 0) { 2402 for (i = 0; i <= kq->kq_knhashmask; i++) { 2403 while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { 2404 if (kn_in_flux(kn)) { 2405 kq->kq_state |= KQ_FLUXWAIT; 2406 msleep(kq, &kq->kq_lock, PSOCK, 2407 "kqclo2", 0); 2408 continue; 2409 } 2410 kn_enter_flux(kn); 2411 KQ_UNLOCK(kq); 2412 knote_drop(kn, td); 2413 KQ_LOCK(kq); 2414 } 2415 } 2416 } 2417 2418 if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { 2419 kq->kq_state |= KQ_TASKDRAIN; 2420 msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); 2421 } 2422 2423 if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 2424 selwakeuppri(&kq->kq_sel, PSOCK); 2425 if (!SEL_WAITING(&kq->kq_sel)) 2426 kq->kq_state &= ~KQ_SEL; 2427 } 2428 2429 KQ_UNLOCK(kq); 2430 } 2431 2432 static void 2433 kqueue_destroy(struct kqueue *kq) 2434 { 2435 2436 KASSERT(kq->kq_fdp == NULL, 2437 ("kqueue still attached to a file descriptor")); 2438 seldrain(&kq->kq_sel); 2439 knlist_destroy(&kq->kq_sel.si_note); 2440 mtx_destroy(&kq->kq_lock); 2441 2442 if (kq->kq_knhash != NULL) 2443 free(kq->kq_knhash, M_KQUEUE); 2444 if (kq->kq_knlist != NULL) 2445 free(kq->kq_knlist, M_KQUEUE); 2446 2447 funsetown(&kq->kq_sigio); 2448 } 2449 2450 /*ARGSUSED*/ 2451 static int 2452 kqueue_close(struct file *fp, struct thread *td) 2453 { 2454 struct kqueue *kq = fp->f_data; 2455 struct filedesc *fdp; 2456 int error; 2457 int filedesc_unlock; 2458 2459 if ((error = kqueue_acquire(fp, &kq))) 2460 return error; 2461 kqueue_drain(kq, td); 2462 2463 /* 2464 * We could be called due to the knote_drop() doing fdrop(), 2465 * called from kqueue_register(). In this case the global 2466 * lock is owned, and filedesc sx is locked before, to not 2467 * take the sleepable lock after non-sleepable. 2468 */ 2469 fdp = kq->kq_fdp; 2470 kq->kq_fdp = NULL; 2471 if (!sx_xlocked(FILEDESC_LOCK(fdp))) { 2472 FILEDESC_XLOCK(fdp); 2473 filedesc_unlock = 1; 2474 } else 2475 filedesc_unlock = 0; 2476 TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list); 2477 if (filedesc_unlock) 2478 FILEDESC_XUNLOCK(fdp); 2479 2480 kqueue_destroy(kq); 2481 chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0); 2482 crfree(kq->kq_cred); 2483 free(kq, M_KQUEUE); 2484 fp->f_data = NULL; 2485 2486 return (0); 2487 } 2488 2489 static int 2490 kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 2491 { 2492 struct kqueue *kq = fp->f_data; 2493 2494 kif->kf_type = KF_TYPE_KQUEUE; 2495 kif->kf_un.kf_kqueue.kf_kqueue_addr = (uintptr_t)kq; 2496 kif->kf_un.kf_kqueue.kf_kqueue_count = kq->kq_count; 2497 kif->kf_un.kf_kqueue.kf_kqueue_state = kq->kq_state; 2498 return (0); 2499 } 2500 2501 static void 2502 kqueue_wakeup(struct kqueue *kq) 2503 { 2504 KQ_OWNED(kq); 2505 2506 if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { 2507 kq->kq_state &= ~KQ_SLEEP; 2508 wakeup(kq); 2509 } 2510 if ((kq->kq_state & KQ_SEL) == KQ_SEL) { 2511 selwakeuppri(&kq->kq_sel, PSOCK); 2512 if (!SEL_WAITING(&kq->kq_sel)) 2513 kq->kq_state &= ~KQ_SEL; 2514 } 2515 if (!knlist_empty(&kq->kq_sel.si_note)) 2516 kqueue_schedtask(kq); 2517 if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { 2518 pgsigio(&kq->kq_sigio, SIGIO, 0); 2519 } 2520 } 2521 2522 /* 2523 * Walk down a list of knotes, activating them if their event has triggered. 2524 * 2525 * There is a possibility to optimize in the case of one kq watching another. 2526 * Instead of scheduling a task to wake it up, you could pass enough state 2527 * down the chain to make up the parent kqueue. Make this code functional 2528 * first. 2529 */ 2530 void 2531 knote(struct knlist *list, long hint, int lockflags) 2532 { 2533 struct kqueue *kq; 2534 struct knote *kn, *tkn; 2535 int error; 2536 2537 if (list == NULL) 2538 return; 2539 2540 KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED); 2541 2542 if ((lockflags & KNF_LISTLOCKED) == 0) 2543 list->kl_lock(list->kl_lockarg); 2544 2545 /* 2546 * If we unlock the list lock (and enter influx), we can 2547 * eliminate the kqueue scheduling, but this will introduce 2548 * four lock/unlock's for each knote to test. Also, marker 2549 * would be needed to keep iteration position, since filters 2550 * or other threads could remove events. 2551 */ 2552 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { 2553 kq = kn->kn_kq; 2554 KQ_LOCK(kq); 2555 if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { 2556 /* 2557 * Do not process the influx notes, except for 2558 * the influx coming from the kq unlock in the 2559 * kqueue_scan(). In the later case, we do 2560 * not interfere with the scan, since the code 2561 * fragment in kqueue_scan() locks the knlist, 2562 * and cannot proceed until we finished. 2563 */ 2564 KQ_UNLOCK(kq); 2565 } else if ((lockflags & KNF_NOKQLOCK) != 0) { 2566 kn_enter_flux(kn); 2567 KQ_UNLOCK(kq); 2568 error = kn->kn_fop->f_event(kn, hint); 2569 KQ_LOCK(kq); 2570 kn_leave_flux(kn); 2571 if (error) 2572 KNOTE_ACTIVATE(kn, 1); 2573 KQ_UNLOCK_FLUX(kq); 2574 } else { 2575 if (kn->kn_fop->f_event(kn, hint)) 2576 KNOTE_ACTIVATE(kn, 1); 2577 KQ_UNLOCK(kq); 2578 } 2579 } 2580 if ((lockflags & KNF_LISTLOCKED) == 0) 2581 list->kl_unlock(list->kl_lockarg); 2582 } 2583 2584 /* 2585 * add a knote to a knlist 2586 */ 2587 void 2588 knlist_add(struct knlist *knl, struct knote *kn, int islocked) 2589 { 2590 2591 KNL_ASSERT_LOCK(knl, islocked); 2592 KQ_NOTOWNED(kn->kn_kq); 2593 KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); 2594 KASSERT((kn->kn_status & KN_DETACHED) != 0, 2595 ("knote %p was not detached", kn)); 2596 if (!islocked) 2597 knl->kl_lock(knl->kl_lockarg); 2598 SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); 2599 if (!islocked) 2600 knl->kl_unlock(knl->kl_lockarg); 2601 KQ_LOCK(kn->kn_kq); 2602 kn->kn_knlist = knl; 2603 kn->kn_status &= ~KN_DETACHED; 2604 KQ_UNLOCK(kn->kn_kq); 2605 } 2606 2607 static void 2608 knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, 2609 int kqislocked) 2610 { 2611 2612 KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); 2613 KNL_ASSERT_LOCK(knl, knlislocked); 2614 mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); 2615 KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); 2616 KASSERT((kn->kn_status & KN_DETACHED) == 0, 2617 ("knote %p was already detached", kn)); 2618 if (!knlislocked) 2619 knl->kl_lock(knl->kl_lockarg); 2620 SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); 2621 kn->kn_knlist = NULL; 2622 if (!knlislocked) 2623 kn_list_unlock(knl); 2624 if (!kqislocked) 2625 KQ_LOCK(kn->kn_kq); 2626 kn->kn_status |= KN_DETACHED; 2627 if (!kqislocked) 2628 KQ_UNLOCK(kn->kn_kq); 2629 } 2630 2631 /* 2632 * remove knote from the specified knlist 2633 */ 2634 void 2635 knlist_remove(struct knlist *knl, struct knote *kn, int islocked) 2636 { 2637 2638 knlist_remove_kq(knl, kn, islocked, 0); 2639 } 2640 2641 int 2642 knlist_empty(struct knlist *knl) 2643 { 2644 2645 KNL_ASSERT_LOCKED(knl); 2646 return (SLIST_EMPTY(&knl->kl_list)); 2647 } 2648 2649 static struct mtx knlist_lock; 2650 MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", 2651 MTX_DEF); 2652 static void knlist_mtx_lock(void *arg); 2653 static void knlist_mtx_unlock(void *arg); 2654 2655 static void 2656 knlist_mtx_lock(void *arg) 2657 { 2658 2659 mtx_lock((struct mtx *)arg); 2660 } 2661 2662 static void 2663 knlist_mtx_unlock(void *arg) 2664 { 2665 2666 mtx_unlock((struct mtx *)arg); 2667 } 2668 2669 static void 2670 knlist_mtx_assert_lock(void *arg, int what) 2671 { 2672 2673 if (what == LA_LOCKED) 2674 mtx_assert((struct mtx *)arg, MA_OWNED); 2675 else 2676 mtx_assert((struct mtx *)arg, MA_NOTOWNED); 2677 } 2678 2679 void 2680 knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), 2681 void (*kl_unlock)(void *), 2682 void (*kl_assert_lock)(void *, int)) 2683 { 2684 2685 if (lock == NULL) 2686 knl->kl_lockarg = &knlist_lock; 2687 else 2688 knl->kl_lockarg = lock; 2689 2690 if (kl_lock == NULL) 2691 knl->kl_lock = knlist_mtx_lock; 2692 else 2693 knl->kl_lock = kl_lock; 2694 if (kl_unlock == NULL) 2695 knl->kl_unlock = knlist_mtx_unlock; 2696 else 2697 knl->kl_unlock = kl_unlock; 2698 if (kl_assert_lock == NULL) 2699 knl->kl_assert_lock = knlist_mtx_assert_lock; 2700 else 2701 knl->kl_assert_lock = kl_assert_lock; 2702 2703 knl->kl_autodestroy = 0; 2704 SLIST_INIT(&knl->kl_list); 2705 } 2706 2707 void 2708 knlist_init_mtx(struct knlist *knl, struct mtx *lock) 2709 { 2710 2711 knlist_init(knl, lock, NULL, NULL, NULL); 2712 } 2713 2714 struct knlist * 2715 knlist_alloc(struct mtx *lock) 2716 { 2717 struct knlist *knl; 2718 2719 knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK); 2720 knlist_init_mtx(knl, lock); 2721 return (knl); 2722 } 2723 2724 void 2725 knlist_destroy(struct knlist *knl) 2726 { 2727 2728 KASSERT(KNLIST_EMPTY(knl), 2729 ("destroying knlist %p with knotes on it", knl)); 2730 } 2731 2732 void 2733 knlist_detach(struct knlist *knl) 2734 { 2735 2736 KNL_ASSERT_LOCKED(knl); 2737 knl->kl_autodestroy = 1; 2738 if (knlist_empty(knl)) { 2739 knlist_destroy(knl); 2740 free(knl, M_KQUEUE); 2741 } 2742 } 2743 2744 /* 2745 * Even if we are locked, we may need to drop the lock to allow any influx 2746 * knotes time to "settle". 2747 */ 2748 void 2749 knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) 2750 { 2751 struct knote *kn, *kn2; 2752 struct kqueue *kq; 2753 2754 KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl)); 2755 if (islocked) 2756 KNL_ASSERT_LOCKED(knl); 2757 else { 2758 KNL_ASSERT_UNLOCKED(knl); 2759 again: /* need to reacquire lock since we have dropped it */ 2760 knl->kl_lock(knl->kl_lockarg); 2761 } 2762 2763 SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { 2764 kq = kn->kn_kq; 2765 KQ_LOCK(kq); 2766 if (kn_in_flux(kn)) { 2767 KQ_UNLOCK(kq); 2768 continue; 2769 } 2770 knlist_remove_kq(knl, kn, 1, 1); 2771 if (killkn) { 2772 kn_enter_flux(kn); 2773 KQ_UNLOCK(kq); 2774 knote_drop_detached(kn, td); 2775 } else { 2776 /* Make sure cleared knotes disappear soon */ 2777 kn->kn_flags |= EV_EOF | EV_ONESHOT; 2778 KQ_UNLOCK(kq); 2779 } 2780 kq = NULL; 2781 } 2782 2783 if (!SLIST_EMPTY(&knl->kl_list)) { 2784 /* there are still in flux knotes remaining */ 2785 kn = SLIST_FIRST(&knl->kl_list); 2786 kq = kn->kn_kq; 2787 KQ_LOCK(kq); 2788 KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); 2789 knl->kl_unlock(knl->kl_lockarg); 2790 kq->kq_state |= KQ_FLUXWAIT; 2791 msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); 2792 kq = NULL; 2793 goto again; 2794 } 2795 2796 if (islocked) 2797 KNL_ASSERT_LOCKED(knl); 2798 else { 2799 knl->kl_unlock(knl->kl_lockarg); 2800 KNL_ASSERT_UNLOCKED(knl); 2801 } 2802 } 2803 2804 /* 2805 * Remove all knotes referencing a specified fd must be called with FILEDESC 2806 * lock. This prevents a race where a new fd comes along and occupies the 2807 * entry and we attach a knote to the fd. 2808 */ 2809 void 2810 knote_fdclose(struct thread *td, int fd) 2811 { 2812 struct filedesc *fdp = td->td_proc->p_fd; 2813 struct kqueue *kq; 2814 struct knote *kn; 2815 int influx; 2816 2817 FILEDESC_XLOCK_ASSERT(fdp); 2818 2819 /* 2820 * We shouldn't have to worry about new kevents appearing on fd 2821 * since filedesc is locked. 2822 */ 2823 TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) { 2824 KQ_LOCK(kq); 2825 2826 again: 2827 influx = 0; 2828 while (kq->kq_knlistsize > fd && 2829 (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { 2830 if (kn_in_flux(kn)) { 2831 /* someone else might be waiting on our knote */ 2832 if (influx) 2833 wakeup(kq); 2834 kq->kq_state |= KQ_FLUXWAIT; 2835 msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); 2836 goto again; 2837 } 2838 kn_enter_flux(kn); 2839 KQ_UNLOCK(kq); 2840 influx = 1; 2841 knote_drop(kn, td); 2842 KQ_LOCK(kq); 2843 } 2844 KQ_UNLOCK_FLUX(kq); 2845 } 2846 } 2847 2848 static int 2849 knote_attach(struct knote *kn, struct kqueue *kq) 2850 { 2851 struct klist *list; 2852 2853 KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); 2854 KQ_OWNED(kq); 2855 2856 if ((kq->kq_state & KQ_CLOSING) != 0) 2857 return (EBADF); 2858 if (kn->kn_fop->f_isfd) { 2859 if (kn->kn_id >= kq->kq_knlistsize) 2860 return (ENOMEM); 2861 list = &kq->kq_knlist[kn->kn_id]; 2862 } else { 2863 if (kq->kq_knhash == NULL) 2864 return (ENOMEM); 2865 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 2866 } 2867 SLIST_INSERT_HEAD(list, kn, kn_link); 2868 return (0); 2869 } 2870 2871 static void 2872 knote_drop(struct knote *kn, struct thread *td) 2873 { 2874 2875 if ((kn->kn_status & KN_DETACHED) == 0) 2876 kn->kn_fop->f_detach(kn); 2877 knote_drop_detached(kn, td); 2878 } 2879 2880 static void 2881 knote_drop_detached(struct knote *kn, struct thread *td) 2882 { 2883 struct kqueue *kq; 2884 struct klist *list; 2885 2886 kq = kn->kn_kq; 2887 2888 KASSERT((kn->kn_status & KN_DETACHED) != 0, 2889 ("knote %p still attached", kn)); 2890 KQ_NOTOWNED(kq); 2891 2892 KQ_LOCK(kq); 2893 for (;;) { 2894 KASSERT(kn->kn_influx >= 1, 2895 ("knote_drop called on %p with influx %d", 2896 kn, kn->kn_influx)); 2897 if (kn->kn_influx == 1) 2898 break; 2899 kq->kq_state |= KQ_FLUXWAIT; 2900 msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); 2901 } 2902 2903 if (kn->kn_fop->f_isfd) 2904 list = &kq->kq_knlist[kn->kn_id]; 2905 else 2906 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 2907 2908 if (!SLIST_EMPTY(list)) 2909 SLIST_REMOVE(list, kn, knote, kn_link); 2910 if (kn->kn_status & KN_QUEUED) 2911 knote_dequeue(kn); 2912 KQ_UNLOCK_FLUX(kq); 2913 2914 if (kn->kn_fop->f_isfd) { 2915 fdrop(kn->kn_fp, td); 2916 kn->kn_fp = NULL; 2917 } 2918 kqueue_fo_release(kn->kn_kevent.filter); 2919 kn->kn_fop = NULL; 2920 knote_free(kn); 2921 } 2922 2923 static void 2924 knote_enqueue(struct knote *kn) 2925 { 2926 struct kqueue *kq = kn->kn_kq; 2927 2928 KQ_OWNED(kn->kn_kq); 2929 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 2930 2931 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 2932 kn->kn_status |= KN_QUEUED; 2933 kq->kq_count++; 2934 kqueue_wakeup(kq); 2935 } 2936 2937 static void 2938 knote_dequeue(struct knote *kn) 2939 { 2940 struct kqueue *kq = kn->kn_kq; 2941 2942 KQ_OWNED(kn->kn_kq); 2943 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 2944 2945 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 2946 kn->kn_status &= ~KN_QUEUED; 2947 kq->kq_count--; 2948 } 2949 2950 static void 2951 knote_init(void *dummy __unused) 2952 { 2953 2954 knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 2955 NULL, NULL, UMA_ALIGN_PTR, 0); 2956 ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue); 2957 prison0.pr_klist = knlist_alloc(&prison0.pr_mtx); 2958 } 2959 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); 2960 2961 static struct knote * 2962 knote_alloc(int mflag) 2963 { 2964 2965 return (uma_zalloc(knote_zone, mflag | M_ZERO)); 2966 } 2967 2968 static void 2969 knote_free(struct knote *kn) 2970 { 2971 2972 uma_zfree(knote_zone, kn); 2973 } 2974 2975 /* 2976 * Register the kev w/ the kq specified by fd. 2977 */ 2978 int 2979 kqfd_register(int fd, struct kevent *kev, struct thread *td, int mflag) 2980 { 2981 struct kqueue *kq; 2982 struct file *fp; 2983 cap_rights_t rights; 2984 int error; 2985 2986 error = fget(td, fd, cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), 2987 &fp); 2988 if (error != 0) 2989 return (error); 2990 if ((error = kqueue_acquire(fp, &kq)) != 0) 2991 goto noacquire; 2992 2993 error = kqueue_register(kq, kev, td, mflag); 2994 kqueue_release(kq, 0); 2995 2996 noacquire: 2997 fdrop(fp, td); 2998 return (error); 2999 } 3000 3001 static int 3002 kqueue_fork_alloc(struct filedesc *fdp, struct file *fp, struct file **fp1, 3003 struct thread *td) 3004 { 3005 struct kqueue *kq, *kq1; 3006 int error; 3007 3008 MPASS(fp->f_type == DTYPE_KQUEUE); 3009 kq = fp->f_data; 3010 if ((kq->kq_state & KQ_CPONFORK) == 0) 3011 return (EOPNOTSUPP); 3012 error = kqueue_acquire_ref(kq); 3013 if (error != 0) 3014 return (error); 3015 error = kern_kqueue_alloc(td, fdp, NULL, fp1, 0, NULL, true, &kq1); 3016 if (error == 0) { 3017 kq1->kq_forksrc = kq; 3018 (*fp1)->f_flag = fp->f_flag & (FREAD | FWRITE | FEXEC | 3019 O_CLOEXEC | O_CLOFORK); 3020 } else { 3021 kqueue_release(kq, 0); 3022 } 3023 return (error); 3024 } 3025 3026 static void 3027 kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1, 3028 struct filedesc *fdp) 3029 { 3030 struct knote *kn1; 3031 const struct filterops *fop; 3032 int error; 3033 3034 fop = kn->kn_fop; 3035 if (fop->f_copy == NULL || (fop->f_isfd && 3036 fdp->fd_files->fdt_ofiles[kn->kn_kevent.ident].fde_file == NULL)) 3037 return; 3038 error = kqueue_expand(kq1, fop, kn->kn_kevent.ident, M_WAITOK); 3039 if (error != 0) 3040 return; 3041 3042 kn1 = knote_alloc(M_WAITOK); 3043 *kn1 = *kn; 3044 kn1->kn_status |= KN_DETACHED; 3045 kn1->kn_status &= ~KN_QUEUED; 3046 kn1->kn_kq = kq1; 3047 error = fop->f_copy(kn1, p1); 3048 if (error != 0) { 3049 knote_free(kn1); 3050 return; 3051 } 3052 (void)kqueue_fo_find(kn->kn_kevent.filter); 3053 if (fop->f_isfd && !fhold(kn1->kn_fp)) { 3054 fop->f_detach(kn1); 3055 kqueue_fo_release(kn->kn_kevent.filter); 3056 knote_free(kn1); 3057 return; 3058 } 3059 if (kn->kn_knlist != NULL) 3060 knlist_add(kn->kn_knlist, kn1, 0); 3061 KQ_LOCK(kq1); 3062 knote_attach(kn1, kq1); 3063 kn1->kn_influx = 0; 3064 if ((kn->kn_status & KN_QUEUED) != 0) 3065 knote_enqueue(kn1); 3066 KQ_UNLOCK(kq1); 3067 } 3068 3069 static void 3070 kqueue_fork_copy_list(struct klist *knlist, struct knote *marker, 3071 struct kqueue *kq, struct kqueue *kq1, struct proc *p1, 3072 struct filedesc *fdp) 3073 { 3074 struct knote *kn; 3075 3076 KQ_OWNED(kq); 3077 kn = SLIST_FIRST(knlist); 3078 while (kn != NULL) { 3079 if ((kn->kn_status & KN_DETACHED) != 0 || 3080 (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) { 3081 kn = SLIST_NEXT(kn, kn_link); 3082 continue; 3083 } 3084 kn_enter_flux(kn); 3085 SLIST_INSERT_AFTER(kn, marker, kn_link); 3086 KQ_UNLOCK(kq); 3087 kqueue_fork_copy_knote(kq1, kn, p1, fdp); 3088 KQ_LOCK(kq); 3089 kn_leave_flux(kn); 3090 kn = SLIST_NEXT(marker, kn_link); 3091 /* XXXKIB switch kn_link to LIST? */ 3092 SLIST_REMOVE(knlist, marker, knote, kn_link); 3093 } 3094 } 3095 3096 static int 3097 kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1, 3098 struct proc *p1, struct thread *td) 3099 { 3100 struct kqueue *kq, *kq1; 3101 struct knote *marker; 3102 int error, i; 3103 3104 error = 0; 3105 MPASS(fp == NULL); 3106 MPASS(fp1->f_type == DTYPE_KQUEUE); 3107 3108 kq1 = fp1->f_data; 3109 kq = kq1->kq_forksrc; 3110 marker = knote_alloc(M_WAITOK); 3111 marker->kn_status = KN_MARKER; 3112 3113 KQ_LOCK(kq); 3114 for (i = 0; i < kq->kq_knlistsize; i++) { 3115 kqueue_fork_copy_list(&kq->kq_knlist[i], marker, kq, kq1, 3116 p1, fdp); 3117 } 3118 if (kq->kq_knhashmask != 0) { 3119 for (i = 0; i <= kq->kq_knhashmask; i++) { 3120 kqueue_fork_copy_list(&kq->kq_knhash[i], marker, kq, 3121 kq1, p1, fdp); 3122 } 3123 } 3124 kqueue_release(kq, 1); 3125 kq1->kq_forksrc = NULL; 3126 KQ_UNLOCK(kq); 3127 3128 knote_free(marker); 3129 return (error); 3130 } 3131 3132 static int 3133 kqueue_fork(struct filedesc *fdp, struct file *fp, struct file **fp1, 3134 struct proc *p1, struct thread *td) 3135 { 3136 if (*fp1 == NULL) 3137 return (kqueue_fork_alloc(fdp, fp, fp1, td)); 3138 return (kqueue_fork_copy(fdp, fp, *fp1, p1, td)); 3139 } 3140 3141 int 3142 knote_triv_copy(struct knote *kn __unused, struct proc *p1 __unused) 3143 { 3144 return (0); 3145 } 3146 3147 struct knote_status_export_bit { 3148 int kn_status_bit; 3149 int knt_status_bit; 3150 }; 3151 3152 #define ST(name) \ 3153 { .kn_status_bit = KN_##name, .knt_status_bit = KNOTE_STATUS_##name } 3154 static const struct knote_status_export_bit knote_status_export_bits[] = { 3155 ST(ACTIVE), 3156 ST(QUEUED), 3157 ST(DISABLED), 3158 ST(DETACHED), 3159 ST(KQUEUE), 3160 }; 3161 #undef ST 3162 3163 static int 3164 knote_status_export(int kn_status) 3165 { 3166 const struct knote_status_export_bit *b; 3167 unsigned i; 3168 int res; 3169 3170 res = 0; 3171 for (i = 0; i < nitems(knote_status_export_bits); i++) { 3172 b = &knote_status_export_bits[i]; 3173 if ((kn_status & b->kn_status_bit) != 0) 3174 res |= b->knt_status_bit; 3175 } 3176 return (res); 3177 } 3178 3179 static int 3180 kern_proc_kqueue_report_one(struct sbuf *s, struct proc *p, 3181 int kq_fd, struct kqueue *kq, struct knote *kn, bool compat32 __unused) 3182 { 3183 struct kinfo_knote kin; 3184 #ifdef COMPAT_FREEBSD32 3185 struct kinfo_knote32 kin32; 3186 #endif 3187 int error; 3188 3189 if (kn->kn_status == KN_MARKER) 3190 return (0); 3191 3192 memset(&kin, 0, sizeof(kin)); 3193 kin.knt_kq_fd = kq_fd; 3194 memcpy(&kin.knt_event, &kn->kn_kevent, sizeof(struct kevent)); 3195 kin.knt_status = knote_status_export(kn->kn_status); 3196 kn_enter_flux(kn); 3197 KQ_UNLOCK_FLUX(kq); 3198 if (kn->kn_fop->f_userdump != NULL) 3199 (void)kn->kn_fop->f_userdump(p, kn, &kin); 3200 #ifdef COMPAT_FREEBSD32 3201 if (compat32) { 3202 freebsd32_kinfo_knote_to_32(&kin, &kin32); 3203 error = sbuf_bcat(s, &kin32, sizeof(kin32)); 3204 } else 3205 #endif 3206 error = sbuf_bcat(s, &kin, sizeof(kin)); 3207 KQ_LOCK(kq); 3208 kn_leave_flux(kn); 3209 return (error); 3210 } 3211 3212 static int 3213 kern_proc_kqueue_report(struct sbuf *s, struct proc *p, int kq_fd, 3214 struct kqueue *kq, bool compat32) 3215 { 3216 struct knote *kn; 3217 int error, i; 3218 3219 error = 0; 3220 KQ_LOCK(kq); 3221 for (i = 0; i < kq->kq_knlistsize; i++) { 3222 SLIST_FOREACH(kn, &kq->kq_knlist[i], kn_link) { 3223 error = kern_proc_kqueue_report_one(s, p, kq_fd, 3224 kq, kn, compat32); 3225 if (error != 0) 3226 goto out; 3227 } 3228 } 3229 if (kq->kq_knhashmask == 0) 3230 goto out; 3231 for (i = 0; i <= kq->kq_knhashmask; i++) { 3232 SLIST_FOREACH(kn, &kq->kq_knhash[i], kn_link) { 3233 error = kern_proc_kqueue_report_one(s, p, kq_fd, 3234 kq, kn, compat32); 3235 if (error != 0) 3236 goto out; 3237 } 3238 } 3239 out: 3240 KQ_UNLOCK_FLUX(kq); 3241 return (error); 3242 } 3243 3244 struct kern_proc_kqueues_out1_cb_args { 3245 struct sbuf *s; 3246 bool compat32; 3247 }; 3248 3249 static int 3250 kern_proc_kqueues_out1_cb(struct proc *p, int fd, struct file *fp, void *arg) 3251 { 3252 struct kqueue *kq; 3253 struct kern_proc_kqueues_out1_cb_args *a; 3254 3255 if (fp->f_type != DTYPE_KQUEUE) 3256 return (0); 3257 a = arg; 3258 kq = fp->f_data; 3259 return (kern_proc_kqueue_report(a->s, p, fd, kq, a->compat32)); 3260 } 3261 3262 static int 3263 kern_proc_kqueues_out1(struct thread *td, struct proc *p, struct sbuf *s, 3264 bool compat32) 3265 { 3266 struct kern_proc_kqueues_out1_cb_args a; 3267 3268 a.s = s; 3269 a.compat32 = compat32; 3270 return (fget_remote_foreach(td, p, kern_proc_kqueues_out1_cb, &a)); 3271 } 3272 3273 int 3274 kern_proc_kqueues_out(struct proc *p, struct sbuf *sb, size_t maxlen, 3275 bool compat32) 3276 { 3277 struct sbuf *s, sm; 3278 size_t sb_len; 3279 int error; 3280 3281 if (maxlen == -1 || maxlen == 0) 3282 sb_len = 128; 3283 else 3284 sb_len = maxlen; 3285 s = sbuf_new(&sm, NULL, sb_len, maxlen == -1 ? SBUF_AUTOEXTEND : 3286 SBUF_FIXEDLEN); 3287 error = kern_proc_kqueues_out1(curthread, p, s, compat32); 3288 sbuf_finish(s); 3289 if (error == 0) { 3290 sbuf_bcat(sb, sbuf_data(s), MIN(sbuf_len(s), maxlen == -1 ? 3291 SIZE_T_MAX : maxlen)); 3292 } 3293 sbuf_delete(s); 3294 return (error); 3295 } 3296 3297 static int 3298 sysctl_kern_proc_kqueue_one(struct thread *td, struct sbuf *s, struct proc *p, 3299 int kq_fd, bool compat32) 3300 { 3301 struct file *fp; 3302 struct kqueue *kq; 3303 int error; 3304 3305 error = fget_remote(td, p, kq_fd, &fp); 3306 if (error == 0) { 3307 if (fp->f_type != DTYPE_KQUEUE) { 3308 error = EINVAL; 3309 } else { 3310 kq = fp->f_data; 3311 error = kern_proc_kqueue_report(s, p, kq_fd, kq, 3312 compat32); 3313 } 3314 fdrop(fp, td); 3315 } 3316 return (error); 3317 } 3318 3319 static int 3320 sysctl_kern_proc_kqueue(SYSCTL_HANDLER_ARGS) 3321 { 3322 struct thread *td; 3323 struct proc *p; 3324 struct sbuf *s, sm; 3325 int error, error1, *name; 3326 bool compat32; 3327 3328 name = (int *)arg1; 3329 if ((u_int)arg2 > 2 || (u_int)arg2 == 0) 3330 return (EINVAL); 3331 3332 error = pget((pid_t)name[0], PGET_HOLD | PGET_CANDEBUG, &p); 3333 if (error != 0) 3334 return (error); 3335 3336 td = curthread; 3337 #ifdef COMPAT_FREEBSD32 3338 compat32 = SV_CURPROC_FLAG(SV_ILP32); 3339 #else 3340 compat32 = false; 3341 #endif 3342 3343 s = sbuf_new_for_sysctl(&sm, NULL, 0, req); 3344 if (s == NULL) { 3345 error = ENOMEM; 3346 goto out; 3347 } 3348 sbuf_clear_flags(s, SBUF_INCLUDENUL); 3349 3350 if ((u_int)arg2 == 1) { 3351 error = kern_proc_kqueues_out1(td, p, s, compat32); 3352 } else { 3353 error = sysctl_kern_proc_kqueue_one(td, s, p, 3354 name[1] /* kq_fd */, compat32); 3355 } 3356 3357 error1 = sbuf_finish(s); 3358 if (error == 0) 3359 error = error1; 3360 sbuf_delete(s); 3361 3362 out: 3363 PRELE(p); 3364 return (error); 3365 } 3366 3367 static SYSCTL_NODE(_kern_proc, KERN_PROC_KQUEUE, kq, 3368 CTLFLAG_RD | CTLFLAG_MPSAFE, 3369 sysctl_kern_proc_kqueue, "KQueue events"); 3370