1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/malloc.h> 37 #include <sys/unistd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/selinfo.h> 42 #include <sys/queue.h> 43 #include <sys/event.h> 44 #include <sys/eventvar.h> 45 #include <sys/poll.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/sysctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/uio.h> 53 54 #include <vm/uma.h> 55 56 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 57 58 static int kqueue_scan(struct file *fp, int maxevents, 59 struct kevent *ulistp, const struct timespec *timeout, 60 struct thread *td); 61 static void kqueue_wakeup(struct kqueue *kq); 62 63 static fo_rdwr_t kqueue_read; 64 static fo_rdwr_t kqueue_write; 65 static fo_ioctl_t kqueue_ioctl; 66 static fo_poll_t kqueue_poll; 67 static fo_kqfilter_t kqueue_kqfilter; 68 static fo_stat_t kqueue_stat; 69 static fo_close_t kqueue_close; 70 71 static struct fileops kqueueops = { 72 .fo_read = kqueue_read, 73 .fo_write = kqueue_write, 74 .fo_ioctl = kqueue_ioctl, 75 .fo_poll = kqueue_poll, 76 .fo_kqfilter = kqueue_kqfilter, 77 .fo_stat = kqueue_stat, 78 .fo_close = kqueue_close, 79 }; 80 81 static void knote_attach(struct knote *kn, struct filedesc *fdp); 82 static void knote_drop(struct knote *kn, struct thread *td); 83 static void knote_enqueue(struct knote *kn); 84 static void knote_dequeue(struct knote *kn); 85 static void knote_init(void); 86 static struct knote *knote_alloc(void); 87 static void knote_free(struct knote *kn); 88 89 static void filt_kqdetach(struct knote *kn); 90 static int filt_kqueue(struct knote *kn, long hint); 91 static int filt_procattach(struct knote *kn); 92 static void filt_procdetach(struct knote *kn); 93 static int filt_proc(struct knote *kn, long hint); 94 static int filt_fileattach(struct knote *kn); 95 static void filt_timerexpire(void *knx); 96 static int filt_timerattach(struct knote *kn); 97 static void filt_timerdetach(struct knote *kn); 98 static int filt_timer(struct knote *kn, long hint); 99 100 static struct filterops file_filtops = 101 { 1, filt_fileattach, NULL, NULL }; 102 static struct filterops kqread_filtops = 103 { 1, NULL, filt_kqdetach, filt_kqueue }; 104 static struct filterops proc_filtops = 105 { 0, filt_procattach, filt_procdetach, filt_proc }; 106 static struct filterops timer_filtops = 107 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 108 109 static uma_zone_t knote_zone; 110 static int kq_ncallouts = 0; 111 static int kq_calloutmax = (4 * 1024); 112 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 113 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 114 115 #define KNOTE_ACTIVATE(kn) do { \ 116 kn->kn_status |= KN_ACTIVE; \ 117 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 118 knote_enqueue(kn); \ 119 } while(0) 120 121 #define KN_HASHSIZE 64 /* XXX should be tunable */ 122 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 123 124 static int 125 filt_nullattach(struct knote *kn) 126 { 127 128 return (ENXIO); 129 }; 130 131 struct filterops null_filtops = 132 { 0, filt_nullattach, NULL, NULL }; 133 134 extern struct filterops sig_filtops; 135 136 /* 137 * Table for for all system-defined filters. 138 */ 139 static struct filterops *sysfilt_ops[] = { 140 &file_filtops, /* EVFILT_READ */ 141 &file_filtops, /* EVFILT_WRITE */ 142 &null_filtops, /* EVFILT_AIO */ 143 &file_filtops, /* EVFILT_VNODE */ 144 &proc_filtops, /* EVFILT_PROC */ 145 &sig_filtops, /* EVFILT_SIGNAL */ 146 &timer_filtops, /* EVFILT_TIMER */ 147 &file_filtops, /* EVFILT_NETDEV */ 148 }; 149 150 static int 151 filt_fileattach(struct knote *kn) 152 { 153 154 return (fo_kqfilter(kn->kn_fp, kn)); 155 } 156 157 /*ARGSUSED*/ 158 static int 159 kqueue_kqfilter(struct file *fp, struct knote *kn) 160 { 161 struct kqueue *kq = kn->kn_fp->f_data; 162 163 if (kn->kn_filter != EVFILT_READ) 164 return (1); 165 166 kn->kn_fop = &kqread_filtops; 167 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 168 return (0); 169 } 170 171 static void 172 filt_kqdetach(struct knote *kn) 173 { 174 struct kqueue *kq = kn->kn_fp->f_data; 175 176 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 177 } 178 179 /*ARGSUSED*/ 180 static int 181 filt_kqueue(struct knote *kn, long hint) 182 { 183 struct kqueue *kq = kn->kn_fp->f_data; 184 185 kn->kn_data = kq->kq_count; 186 return (kn->kn_data > 0); 187 } 188 189 static int 190 filt_procattach(struct knote *kn) 191 { 192 struct proc *p; 193 int immediate; 194 int error; 195 196 immediate = 0; 197 p = pfind(kn->kn_id); 198 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 199 p = zpfind(kn->kn_id); 200 immediate = 1; 201 } 202 if (p == NULL) 203 return (ESRCH); 204 if ((error = p_cansee(curthread, p))) { 205 PROC_UNLOCK(p); 206 return (error); 207 } 208 209 kn->kn_ptr.p_proc = p; 210 kn->kn_flags |= EV_CLEAR; /* automatically set */ 211 212 /* 213 * internal flag indicating registration done by kernel 214 */ 215 if (kn->kn_flags & EV_FLAG1) { 216 kn->kn_data = kn->kn_sdata; /* ppid */ 217 kn->kn_fflags = NOTE_CHILD; 218 kn->kn_flags &= ~EV_FLAG1; 219 } 220 221 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 222 223 /* 224 * Immediately activate any exit notes if the target process is a 225 * zombie. This is necessary to handle the case where the target 226 * process, e.g. a child, dies before the kevent is registered. 227 */ 228 if (immediate && filt_proc(kn, NOTE_EXIT)) 229 KNOTE_ACTIVATE(kn); 230 231 PROC_UNLOCK(p); 232 233 return (0); 234 } 235 236 /* 237 * The knote may be attached to a different process, which may exit, 238 * leaving nothing for the knote to be attached to. So when the process 239 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 240 * it will be deleted when read out. However, as part of the knote deletion, 241 * this routine is called, so a check is needed to avoid actually performing 242 * a detach, because the original process does not exist any more. 243 */ 244 static void 245 filt_procdetach(struct knote *kn) 246 { 247 struct proc *p = kn->kn_ptr.p_proc; 248 249 if (kn->kn_status & KN_DETACHED) 250 return; 251 252 PROC_LOCK(p); 253 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 254 PROC_UNLOCK(p); 255 } 256 257 static int 258 filt_proc(struct knote *kn, long hint) 259 { 260 u_int event; 261 262 /* 263 * mask off extra data 264 */ 265 event = (u_int)hint & NOTE_PCTRLMASK; 266 267 /* 268 * if the user is interested in this event, record it. 269 */ 270 if (kn->kn_sfflags & event) 271 kn->kn_fflags |= event; 272 273 /* 274 * process is gone, so flag the event as finished. 275 */ 276 if (event == NOTE_EXIT) { 277 kn->kn_status |= KN_DETACHED; 278 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 279 return (1); 280 } 281 282 /* 283 * Process will already be reported as gone. 284 * Do not report anything else, as the knote will be destroyed soon. 285 */ 286 if (kn->kn_status & KN_DETACHED) 287 return (0); 288 289 /* 290 * process forked, and user wants to track the new process, 291 * so attach a new knote to it, and immediately report an 292 * event with the parent's pid. 293 */ 294 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 295 struct kevent kev; 296 int error; 297 298 /* 299 * register knote with new process. 300 */ 301 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 302 kev.filter = kn->kn_filter; 303 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 304 kev.fflags = kn->kn_sfflags; 305 kev.data = kn->kn_id; /* parent */ 306 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 307 error = kqueue_register(kn->kn_kq, &kev, NULL); 308 if (error) 309 kn->kn_fflags |= NOTE_TRACKERR; 310 } 311 312 return (kn->kn_fflags != 0); 313 } 314 315 static void 316 filt_timerexpire(void *knx) 317 { 318 struct knote *kn = knx; 319 struct callout *calloutp; 320 struct timeval tv; 321 int tticks; 322 323 kn->kn_data++; 324 KNOTE_ACTIVATE(kn); 325 326 if ((kn->kn_flags & EV_ONESHOT) == 0) { 327 tv.tv_sec = kn->kn_sdata / 1000; 328 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 329 tticks = tvtohz(&tv); 330 calloutp = (struct callout *)kn->kn_hook; 331 callout_reset(calloutp, tticks, filt_timerexpire, kn); 332 } 333 } 334 335 /* 336 * data contains amount of time to sleep, in milliseconds 337 */ 338 static int 339 filt_timerattach(struct knote *kn) 340 { 341 struct callout *calloutp; 342 struct timeval tv; 343 int tticks; 344 345 if (kq_ncallouts >= kq_calloutmax) 346 return (ENOMEM); 347 kq_ncallouts++; 348 349 tv.tv_sec = kn->kn_sdata / 1000; 350 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 351 tticks = tvtohz(&tv); 352 353 kn->kn_flags |= EV_CLEAR; /* automatically set */ 354 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 355 M_KQUEUE, M_WAITOK); 356 callout_init(calloutp, 0); 357 callout_reset(calloutp, tticks, filt_timerexpire, kn); 358 kn->kn_hook = calloutp; 359 360 return (0); 361 } 362 363 static void 364 filt_timerdetach(struct knote *kn) 365 { 366 struct callout *calloutp; 367 368 calloutp = (struct callout *)kn->kn_hook; 369 callout_stop(calloutp); 370 FREE(calloutp, M_KQUEUE); 371 kq_ncallouts--; 372 } 373 374 static int 375 filt_timer(struct knote *kn, long hint) 376 { 377 378 return (kn->kn_data != 0); 379 } 380 381 /* 382 * MPSAFE 383 */ 384 int 385 kqueue(struct thread *td, struct kqueue_args *uap) 386 { 387 struct filedesc *fdp; 388 struct kqueue *kq; 389 struct file *fp; 390 int fd, error; 391 392 mtx_lock(&Giant); 393 fdp = td->td_proc->p_fd; 394 error = falloc(td, &fp, &fd); 395 if (error) 396 goto done2; 397 /* An extra reference on `nfp' has been held for us by falloc(). */ 398 kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 399 TAILQ_INIT(&kq->kq_head); 400 FILE_LOCK(fp); 401 fp->f_flag = FREAD | FWRITE; 402 fp->f_type = DTYPE_KQUEUE; 403 fp->f_ops = &kqueueops; 404 TAILQ_INIT(&kq->kq_head); 405 fp->f_data = kq; 406 FILE_UNLOCK(fp); 407 fdrop(fp, td); 408 FILEDESC_LOCK(fdp); 409 td->td_retval[0] = fd; 410 if (fdp->fd_knlistsize < 0) 411 fdp->fd_knlistsize = 0; /* this process has a kq */ 412 FILEDESC_UNLOCK(fdp); 413 kq->kq_fdp = fdp; 414 done2: 415 mtx_unlock(&Giant); 416 return (error); 417 } 418 419 #ifndef _SYS_SYSPROTO_H_ 420 struct kevent_args { 421 int fd; 422 const struct kevent *changelist; 423 int nchanges; 424 struct kevent *eventlist; 425 int nevents; 426 const struct timespec *timeout; 427 }; 428 #endif 429 /* 430 * MPSAFE 431 */ 432 int 433 kevent(struct thread *td, struct kevent_args *uap) 434 { 435 struct kevent *kevp; 436 struct kqueue *kq; 437 struct file *fp; 438 struct timespec ts; 439 int i, n, nerrors, error; 440 441 if ((error = fget(td, uap->fd, &fp)) != 0) 442 return (error); 443 if (fp->f_type != DTYPE_KQUEUE) { 444 fdrop(fp, td); 445 return (EBADF); 446 } 447 if (uap->timeout != NULL) { 448 error = copyin(uap->timeout, &ts, sizeof(ts)); 449 if (error) 450 goto done_nogiant; 451 uap->timeout = &ts; 452 } 453 mtx_lock(&Giant); 454 455 kq = fp->f_data; 456 nerrors = 0; 457 458 while (uap->nchanges > 0) { 459 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 460 error = copyin(uap->changelist, kq->kq_kev, 461 n * sizeof(struct kevent)); 462 if (error) 463 goto done; 464 for (i = 0; i < n; i++) { 465 kevp = &kq->kq_kev[i]; 466 kevp->flags &= ~EV_SYSFLAGS; 467 error = kqueue_register(kq, kevp, td); 468 if (error) { 469 if (uap->nevents != 0) { 470 kevp->flags = EV_ERROR; 471 kevp->data = error; 472 (void) copyout(kevp, 473 uap->eventlist, 474 sizeof(*kevp)); 475 uap->eventlist++; 476 uap->nevents--; 477 nerrors++; 478 } else { 479 goto done; 480 } 481 } 482 } 483 uap->nchanges -= n; 484 uap->changelist += n; 485 } 486 if (nerrors) { 487 td->td_retval[0] = nerrors; 488 error = 0; 489 goto done; 490 } 491 492 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td); 493 done: 494 mtx_unlock(&Giant); 495 done_nogiant: 496 if (fp != NULL) 497 fdrop(fp, td); 498 return (error); 499 } 500 501 int 502 kqueue_add_filteropts(int filt, struct filterops *filtops) 503 { 504 505 if (filt > 0) 506 panic("filt(%d) > 0", filt); 507 if (filt + EVFILT_SYSCOUNT < 0) 508 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 509 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 510 if (sysfilt_ops[~filt] != &null_filtops) 511 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 512 sysfilt_ops[~filt] = filtops; 513 return (0); 514 } 515 516 int 517 kqueue_del_filteropts(int filt) 518 { 519 520 if (filt > 0) 521 panic("filt(%d) > 0", filt); 522 if (filt + EVFILT_SYSCOUNT < 0) 523 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 524 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 525 if (sysfilt_ops[~filt] == &null_filtops) 526 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 527 sysfilt_ops[~filt] = &null_filtops; 528 return (0); 529 } 530 531 int 532 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) 533 { 534 struct filedesc *fdp = kq->kq_fdp; 535 struct filterops *fops; 536 struct file *fp = NULL; 537 struct knote *kn = NULL; 538 int s, error = 0; 539 540 if (kev->filter < 0) { 541 if (kev->filter + EVFILT_SYSCOUNT < 0) 542 return (EINVAL); 543 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 544 } else { 545 /* 546 * XXX 547 * filter attach routine is responsible for insuring that 548 * the identifier can be attached to it. 549 */ 550 printf("unknown filter: %d\n", kev->filter); 551 return (EINVAL); 552 } 553 554 FILEDESC_LOCK(fdp); 555 if (fops->f_isfd) { 556 /* validate descriptor */ 557 if ((u_int)kev->ident >= fdp->fd_nfiles || 558 (fp = fdp->fd_ofiles[kev->ident]) == NULL) { 559 FILEDESC_UNLOCK(fdp); 560 return (EBADF); 561 } 562 fhold(fp); 563 564 if (kev->ident < fdp->fd_knlistsize) { 565 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 566 if (kq == kn->kn_kq && 567 kev->filter == kn->kn_filter) 568 break; 569 } 570 } else { 571 if (fdp->fd_knhashmask != 0) { 572 struct klist *list; 573 574 list = &fdp->fd_knhash[ 575 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 576 SLIST_FOREACH(kn, list, kn_link) 577 if (kev->ident == kn->kn_id && 578 kq == kn->kn_kq && 579 kev->filter == kn->kn_filter) 580 break; 581 } 582 } 583 FILEDESC_UNLOCK(fdp); 584 585 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 586 error = ENOENT; 587 goto done; 588 } 589 590 /* 591 * kn now contains the matching knote, or NULL if no match 592 */ 593 if (kev->flags & EV_ADD) { 594 595 if (kn == NULL) { 596 kn = knote_alloc(); 597 if (kn == NULL) { 598 error = ENOMEM; 599 goto done; 600 } 601 kn->kn_fp = fp; 602 kn->kn_kq = kq; 603 kn->kn_fop = fops; 604 605 /* 606 * apply reference count to knote structure, and 607 * do not release it at the end of this routine. 608 */ 609 fp = NULL; 610 611 kn->kn_sfflags = kev->fflags; 612 kn->kn_sdata = kev->data; 613 kev->fflags = 0; 614 kev->data = 0; 615 kn->kn_kevent = *kev; 616 617 knote_attach(kn, fdp); 618 if ((error = fops->f_attach(kn)) != 0) { 619 knote_drop(kn, td); 620 goto done; 621 } 622 } else { 623 /* 624 * The user may change some filter values after the 625 * initial EV_ADD, but doing so will not reset any 626 * filter which has already been triggered. 627 */ 628 kn->kn_sfflags = kev->fflags; 629 kn->kn_sdata = kev->data; 630 kn->kn_kevent.udata = kev->udata; 631 } 632 633 s = splhigh(); 634 if (kn->kn_fop->f_event(kn, 0)) 635 KNOTE_ACTIVATE(kn); 636 splx(s); 637 638 } else if (kev->flags & EV_DELETE) { 639 kn->kn_fop->f_detach(kn); 640 knote_drop(kn, td); 641 goto done; 642 } 643 644 if ((kev->flags & EV_DISABLE) && 645 ((kn->kn_status & KN_DISABLED) == 0)) { 646 s = splhigh(); 647 kn->kn_status |= KN_DISABLED; 648 splx(s); 649 } 650 651 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 652 s = splhigh(); 653 kn->kn_status &= ~KN_DISABLED; 654 if ((kn->kn_status & KN_ACTIVE) && 655 ((kn->kn_status & KN_QUEUED) == 0)) 656 knote_enqueue(kn); 657 splx(s); 658 } 659 660 done: 661 if (fp != NULL) 662 fdrop(fp, td); 663 return (error); 664 } 665 666 static int 667 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 668 const struct timespec *tsp, struct thread *td) 669 { 670 struct kqueue *kq; 671 struct kevent *kevp; 672 struct timeval atv, rtv, ttv; 673 struct knote *kn, marker; 674 int s, count, timeout, nkev = 0, error = 0; 675 676 FILE_LOCK_ASSERT(fp, MA_NOTOWNED); 677 678 kq = fp->f_data; 679 count = maxevents; 680 if (count == 0) 681 goto done; 682 683 if (tsp != NULL) { 684 TIMESPEC_TO_TIMEVAL(&atv, tsp); 685 if (itimerfix(&atv)) { 686 error = EINVAL; 687 goto done; 688 } 689 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 690 timeout = -1; 691 else 692 timeout = atv.tv_sec > 24 * 60 * 60 ? 693 24 * 60 * 60 * hz : tvtohz(&atv); 694 getmicrouptime(&rtv); 695 timevaladd(&atv, &rtv); 696 } else { 697 atv.tv_sec = 0; 698 atv.tv_usec = 0; 699 timeout = 0; 700 } 701 goto start; 702 703 retry: 704 if (atv.tv_sec || atv.tv_usec) { 705 getmicrouptime(&rtv); 706 if (timevalcmp(&rtv, &atv, >=)) 707 goto done; 708 ttv = atv; 709 timevalsub(&ttv, &rtv); 710 timeout = ttv.tv_sec > 24 * 60 * 60 ? 711 24 * 60 * 60 * hz : tvtohz(&ttv); 712 } 713 714 start: 715 kevp = kq->kq_kev; 716 s = splhigh(); 717 if (kq->kq_count == 0) { 718 if (timeout < 0) { 719 error = EWOULDBLOCK; 720 } else { 721 kq->kq_state |= KQ_SLEEP; 722 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 723 } 724 splx(s); 725 if (error == 0) 726 goto retry; 727 /* don't restart after signals... */ 728 if (error == ERESTART) 729 error = EINTR; 730 else if (error == EWOULDBLOCK) 731 error = 0; 732 goto done; 733 } 734 735 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 736 while (count) { 737 kn = TAILQ_FIRST(&kq->kq_head); 738 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 739 if (kn == &marker) { 740 splx(s); 741 if (count == maxevents) 742 goto retry; 743 goto done; 744 } 745 if (kn->kn_status & KN_DISABLED) { 746 kn->kn_status &= ~KN_QUEUED; 747 kq->kq_count--; 748 continue; 749 } 750 if ((kn->kn_flags & EV_ONESHOT) == 0 && 751 kn->kn_fop->f_event(kn, 0) == 0) { 752 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 753 kq->kq_count--; 754 continue; 755 } 756 *kevp = kn->kn_kevent; 757 kevp++; 758 nkev++; 759 if (kn->kn_flags & EV_ONESHOT) { 760 kn->kn_status &= ~KN_QUEUED; 761 kq->kq_count--; 762 splx(s); 763 kn->kn_fop->f_detach(kn); 764 knote_drop(kn, td); 765 s = splhigh(); 766 } else if (kn->kn_flags & EV_CLEAR) { 767 kn->kn_data = 0; 768 kn->kn_fflags = 0; 769 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 770 kq->kq_count--; 771 } else { 772 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 773 } 774 count--; 775 if (nkev == KQ_NEVENTS) { 776 splx(s); 777 error = copyout(&kq->kq_kev, ulistp, 778 sizeof(struct kevent) * nkev); 779 ulistp += nkev; 780 nkev = 0; 781 kevp = kq->kq_kev; 782 s = splhigh(); 783 if (error) 784 break; 785 } 786 } 787 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 788 splx(s); 789 done: 790 if (nkev != 0) 791 error = copyout(&kq->kq_kev, ulistp, 792 sizeof(struct kevent) * nkev); 793 td->td_retval[0] = maxevents - count; 794 return (error); 795 } 796 797 /* 798 * XXX 799 * This could be expanded to call kqueue_scan, if desired. 800 */ 801 /*ARGSUSED*/ 802 static int 803 kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 804 int flags, struct thread *td) 805 { 806 return (ENXIO); 807 } 808 809 /*ARGSUSED*/ 810 static int 811 kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 812 int flags, struct thread *td) 813 { 814 return (ENXIO); 815 } 816 817 /*ARGSUSED*/ 818 static int 819 kqueue_ioctl(struct file *fp, u_long com, void *data, 820 struct ucred *active_cred, struct thread *td) 821 { 822 return (ENOTTY); 823 } 824 825 /*ARGSUSED*/ 826 static int 827 kqueue_poll(struct file *fp, int events, struct ucred *active_cred, 828 struct thread *td) 829 { 830 struct kqueue *kq; 831 int revents = 0; 832 int s = splnet(); 833 834 kq = fp->f_data; 835 if (events & (POLLIN | POLLRDNORM)) { 836 if (kq->kq_count) { 837 revents |= events & (POLLIN | POLLRDNORM); 838 } else { 839 selrecord(td, &kq->kq_sel); 840 kq->kq_state |= KQ_SEL; 841 } 842 } 843 splx(s); 844 return (revents); 845 } 846 847 /*ARGSUSED*/ 848 static int 849 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 850 struct thread *td) 851 { 852 struct kqueue *kq; 853 854 kq = fp->f_data; 855 bzero((void *)st, sizeof(*st)); 856 st->st_size = kq->kq_count; 857 st->st_blksize = sizeof(struct kevent); 858 st->st_mode = S_IFIFO; 859 return (0); 860 } 861 862 /*ARGSUSED*/ 863 static int 864 kqueue_close(struct file *fp, struct thread *td) 865 { 866 struct kqueue *kq = fp->f_data; 867 struct filedesc *fdp = kq->kq_fdp; 868 struct knote **knp, *kn, *kn0; 869 int i; 870 871 FILEDESC_LOCK(fdp); 872 for (i = 0; i < fdp->fd_knlistsize; i++) { 873 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 874 kn = *knp; 875 while (kn != NULL) { 876 kn0 = SLIST_NEXT(kn, kn_link); 877 if (kq == kn->kn_kq) { 878 kn->kn_fop->f_detach(kn); 879 *knp = kn0; 880 FILE_LOCK(kn->kn_fp); 881 FILEDESC_UNLOCK(fdp); 882 fdrop_locked(kn->kn_fp, td); 883 knote_free(kn); 884 FILEDESC_LOCK(fdp); 885 } else { 886 knp = &SLIST_NEXT(kn, kn_link); 887 } 888 kn = kn0; 889 } 890 } 891 if (fdp->fd_knhashmask != 0) { 892 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 893 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 894 kn = *knp; 895 while (kn != NULL) { 896 kn0 = SLIST_NEXT(kn, kn_link); 897 if (kq == kn->kn_kq) { 898 kn->kn_fop->f_detach(kn); 899 *knp = kn0; 900 /* XXX non-fd release of kn->kn_ptr */ 901 FILEDESC_UNLOCK(fdp); 902 knote_free(kn); 903 FILEDESC_LOCK(fdp); 904 } else { 905 knp = &SLIST_NEXT(kn, kn_link); 906 } 907 kn = kn0; 908 } 909 } 910 } 911 FILEDESC_UNLOCK(fdp); 912 free(kq, M_KQUEUE); 913 fp->f_data = NULL; 914 915 return (0); 916 } 917 918 static void 919 kqueue_wakeup(struct kqueue *kq) 920 { 921 922 if (kq->kq_state & KQ_SLEEP) { 923 kq->kq_state &= ~KQ_SLEEP; 924 wakeup(kq); 925 } 926 if (kq->kq_state & KQ_SEL) { 927 kq->kq_state &= ~KQ_SEL; 928 selwakeup(&kq->kq_sel); 929 } 930 KNOTE(&kq->kq_sel.si_note, 0); 931 } 932 933 /* 934 * walk down a list of knotes, activating them if their event has triggered. 935 */ 936 void 937 knote(struct klist *list, long hint) 938 { 939 struct knote *kn; 940 941 SLIST_FOREACH(kn, list, kn_selnext) 942 if (kn->kn_fop->f_event(kn, hint)) 943 KNOTE_ACTIVATE(kn); 944 } 945 946 /* 947 * remove all knotes from a specified klist 948 */ 949 void 950 knote_remove(struct thread *td, struct klist *list) 951 { 952 struct knote *kn; 953 954 while ((kn = SLIST_FIRST(list)) != NULL) { 955 kn->kn_fop->f_detach(kn); 956 knote_drop(kn, td); 957 } 958 } 959 960 /* 961 * remove all knotes referencing a specified fd 962 */ 963 void 964 knote_fdclose(struct thread *td, int fd) 965 { 966 struct filedesc *fdp = td->td_proc->p_fd; 967 struct klist *list; 968 969 FILEDESC_LOCK(fdp); 970 list = &fdp->fd_knlist[fd]; 971 FILEDESC_UNLOCK(fdp); 972 knote_remove(td, list); 973 } 974 975 static void 976 knote_attach(struct knote *kn, struct filedesc *fdp) 977 { 978 struct klist *list, *tmp_knhash; 979 u_long tmp_knhashmask; 980 int size; 981 982 FILEDESC_LOCK(fdp); 983 984 if (! kn->kn_fop->f_isfd) { 985 if (fdp->fd_knhashmask == 0) { 986 FILEDESC_UNLOCK(fdp); 987 tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 988 &tmp_knhashmask); 989 FILEDESC_LOCK(fdp); 990 if (fdp->fd_knhashmask == 0) { 991 fdp->fd_knhash = tmp_knhash; 992 fdp->fd_knhashmask = tmp_knhashmask; 993 } else { 994 free(tmp_knhash, M_KQUEUE); 995 } 996 } 997 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 998 goto done; 999 } 1000 1001 if (fdp->fd_knlistsize <= kn->kn_id) { 1002 size = fdp->fd_knlistsize; 1003 while (size <= kn->kn_id) 1004 size += KQEXTENT; 1005 FILEDESC_UNLOCK(fdp); 1006 MALLOC(list, struct klist *, 1007 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); 1008 FILEDESC_LOCK(fdp); 1009 if (fdp->fd_knlistsize > kn->kn_id) { 1010 FREE(list, M_KQUEUE); 1011 goto bigenough; 1012 } 1013 if (fdp->fd_knlist != NULL) { 1014 bcopy(fdp->fd_knlist, list, 1015 fdp->fd_knlistsize * sizeof(struct klist *)); 1016 FREE(fdp->fd_knlist, M_KQUEUE); 1017 } 1018 bzero((caddr_t)list + 1019 fdp->fd_knlistsize * sizeof(struct klist *), 1020 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 1021 fdp->fd_knlistsize = size; 1022 fdp->fd_knlist = list; 1023 } 1024 bigenough: 1025 list = &fdp->fd_knlist[kn->kn_id]; 1026 done: 1027 FILEDESC_UNLOCK(fdp); 1028 SLIST_INSERT_HEAD(list, kn, kn_link); 1029 kn->kn_status = 0; 1030 } 1031 1032 /* 1033 * should be called at spl == 0, since we don't want to hold spl 1034 * while calling fdrop and free. 1035 */ 1036 static void 1037 knote_drop(struct knote *kn, struct thread *td) 1038 { 1039 struct filedesc *fdp = td->td_proc->p_fd; 1040 struct klist *list; 1041 1042 FILEDESC_LOCK(fdp); 1043 if (kn->kn_fop->f_isfd) 1044 list = &fdp->fd_knlist[kn->kn_id]; 1045 else 1046 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1047 if (kn->kn_fop->f_isfd) 1048 FILE_LOCK(kn->kn_fp); 1049 FILEDESC_UNLOCK(fdp); 1050 1051 SLIST_REMOVE(list, kn, knote, kn_link); 1052 if (kn->kn_status & KN_QUEUED) 1053 knote_dequeue(kn); 1054 if (kn->kn_fop->f_isfd) 1055 fdrop_locked(kn->kn_fp, td); 1056 knote_free(kn); 1057 } 1058 1059 1060 static void 1061 knote_enqueue(struct knote *kn) 1062 { 1063 struct kqueue *kq = kn->kn_kq; 1064 int s = splhigh(); 1065 1066 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1067 1068 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1069 kn->kn_status |= KN_QUEUED; 1070 kq->kq_count++; 1071 splx(s); 1072 kqueue_wakeup(kq); 1073 } 1074 1075 static void 1076 knote_dequeue(struct knote *kn) 1077 { 1078 struct kqueue *kq = kn->kn_kq; 1079 int s = splhigh(); 1080 1081 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1082 1083 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1084 kn->kn_status &= ~KN_QUEUED; 1085 kq->kq_count--; 1086 splx(s); 1087 } 1088 1089 static void 1090 knote_init(void) 1091 { 1092 knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 1093 NULL, NULL, UMA_ALIGN_PTR, 0); 1094 1095 } 1096 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1097 1098 static struct knote * 1099 knote_alloc(void) 1100 { 1101 return ((struct knote *)uma_zalloc(knote_zone, M_WAITOK)); 1102 } 1103 1104 static void 1105 knote_free(struct knote *kn) 1106 { 1107 uma_zfree(knote_zone, kn); 1108 } 1109