1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/malloc.h> 37 #include <sys/unistd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/fcntl.h> 41 #include <sys/selinfo.h> 42 #include <sys/queue.h> 43 #include <sys/event.h> 44 #include <sys/eventvar.h> 45 #include <sys/poll.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/sysctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/uio.h> 53 54 #include <vm/uma.h> 55 56 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 57 58 static int kqueue_scan(struct file *fp, int maxevents, 59 struct kevent *ulistp, const struct timespec *timeout, 60 struct thread *td); 61 static void kqueue_wakeup(struct kqueue *kq); 62 63 static fo_rdwr_t kqueue_read; 64 static fo_rdwr_t kqueue_write; 65 static fo_ioctl_t kqueue_ioctl; 66 static fo_poll_t kqueue_poll; 67 static fo_kqfilter_t kqueue_kqfilter; 68 static fo_stat_t kqueue_stat; 69 static fo_close_t kqueue_close; 70 71 static struct fileops kqueueops = { 72 .fo_read = kqueue_read, 73 .fo_write = kqueue_write, 74 .fo_ioctl = kqueue_ioctl, 75 .fo_poll = kqueue_poll, 76 .fo_kqfilter = kqueue_kqfilter, 77 .fo_stat = kqueue_stat, 78 .fo_close = kqueue_close, 79 }; 80 81 static void knote_attach(struct knote *kn, struct filedesc *fdp); 82 static void knote_drop(struct knote *kn, struct thread *td); 83 static void knote_enqueue(struct knote *kn); 84 static void knote_dequeue(struct knote *kn); 85 static void knote_init(void); 86 static struct knote *knote_alloc(void); 87 static void knote_free(struct knote *kn); 88 89 static void filt_kqdetach(struct knote *kn); 90 static int filt_kqueue(struct knote *kn, long hint); 91 static int filt_procattach(struct knote *kn); 92 static void filt_procdetach(struct knote *kn); 93 static int filt_proc(struct knote *kn, long hint); 94 static int filt_fileattach(struct knote *kn); 95 static void filt_timerexpire(void *knx); 96 static int filt_timerattach(struct knote *kn); 97 static void filt_timerdetach(struct knote *kn); 98 static int filt_timer(struct knote *kn, long hint); 99 100 static struct filterops file_filtops = 101 { 1, filt_fileattach, NULL, NULL }; 102 static struct filterops kqread_filtops = 103 { 1, NULL, filt_kqdetach, filt_kqueue }; 104 static struct filterops proc_filtops = 105 { 0, filt_procattach, filt_procdetach, filt_proc }; 106 static struct filterops timer_filtops = 107 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 108 109 static uma_zone_t knote_zone; 110 static int kq_ncallouts = 0; 111 static int kq_calloutmax = (4 * 1024); 112 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 113 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 114 115 #define KNOTE_ACTIVATE(kn) do { \ 116 kn->kn_status |= KN_ACTIVE; \ 117 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 118 knote_enqueue(kn); \ 119 } while(0) 120 121 #define KN_HASHSIZE 64 /* XXX should be tunable */ 122 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 123 124 static int 125 filt_nullattach(struct knote *kn) 126 { 127 128 return (ENXIO); 129 }; 130 131 struct filterops null_filtops = 132 { 0, filt_nullattach, NULL, NULL }; 133 134 extern struct filterops sig_filtops; 135 extern struct filterops fs_filtops; 136 137 /* 138 * Table for for all system-defined filters. 139 */ 140 static struct filterops *sysfilt_ops[] = { 141 &file_filtops, /* EVFILT_READ */ 142 &file_filtops, /* EVFILT_WRITE */ 143 &null_filtops, /* EVFILT_AIO */ 144 &file_filtops, /* EVFILT_VNODE */ 145 &proc_filtops, /* EVFILT_PROC */ 146 &sig_filtops, /* EVFILT_SIGNAL */ 147 &timer_filtops, /* EVFILT_TIMER */ 148 &file_filtops, /* EVFILT_NETDEV */ 149 &fs_filtops, /* EVFILT_FS */ 150 }; 151 152 static int 153 filt_fileattach(struct knote *kn) 154 { 155 156 return (fo_kqfilter(kn->kn_fp, kn)); 157 } 158 159 /*ARGSUSED*/ 160 static int 161 kqueue_kqfilter(struct file *fp, struct knote *kn) 162 { 163 struct kqueue *kq = kn->kn_fp->f_data; 164 165 if (kn->kn_filter != EVFILT_READ) 166 return (1); 167 168 kn->kn_fop = &kqread_filtops; 169 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 170 return (0); 171 } 172 173 static void 174 filt_kqdetach(struct knote *kn) 175 { 176 struct kqueue *kq = kn->kn_fp->f_data; 177 178 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 179 } 180 181 /*ARGSUSED*/ 182 static int 183 filt_kqueue(struct knote *kn, long hint) 184 { 185 struct kqueue *kq = kn->kn_fp->f_data; 186 187 kn->kn_data = kq->kq_count; 188 return (kn->kn_data > 0); 189 } 190 191 static int 192 filt_procattach(struct knote *kn) 193 { 194 struct proc *p; 195 int immediate; 196 int error; 197 198 immediate = 0; 199 p = pfind(kn->kn_id); 200 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 201 p = zpfind(kn->kn_id); 202 immediate = 1; 203 } 204 if (p == NULL) 205 return (ESRCH); 206 if ((error = p_cansee(curthread, p))) { 207 PROC_UNLOCK(p); 208 return (error); 209 } 210 211 kn->kn_ptr.p_proc = p; 212 kn->kn_flags |= EV_CLEAR; /* automatically set */ 213 214 /* 215 * internal flag indicating registration done by kernel 216 */ 217 if (kn->kn_flags & EV_FLAG1) { 218 kn->kn_data = kn->kn_sdata; /* ppid */ 219 kn->kn_fflags = NOTE_CHILD; 220 kn->kn_flags &= ~EV_FLAG1; 221 } 222 223 if (immediate == 0) 224 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 225 226 /* 227 * Immediately activate any exit notes if the target process is a 228 * zombie. This is necessary to handle the case where the target 229 * process, e.g. a child, dies before the kevent is registered. 230 */ 231 if (immediate && filt_proc(kn, NOTE_EXIT)) 232 KNOTE_ACTIVATE(kn); 233 234 PROC_UNLOCK(p); 235 236 return (0); 237 } 238 239 /* 240 * The knote may be attached to a different process, which may exit, 241 * leaving nothing for the knote to be attached to. So when the process 242 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 243 * it will be deleted when read out. However, as part of the knote deletion, 244 * this routine is called, so a check is needed to avoid actually performing 245 * a detach, because the original process does not exist any more. 246 */ 247 static void 248 filt_procdetach(struct knote *kn) 249 { 250 struct proc *p = kn->kn_ptr.p_proc; 251 252 if (kn->kn_status & KN_DETACHED) 253 return; 254 255 PROC_LOCK(p); 256 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 257 PROC_UNLOCK(p); 258 } 259 260 static int 261 filt_proc(struct knote *kn, long hint) 262 { 263 u_int event; 264 265 /* 266 * mask off extra data 267 */ 268 event = (u_int)hint & NOTE_PCTRLMASK; 269 270 /* 271 * if the user is interested in this event, record it. 272 */ 273 if (kn->kn_sfflags & event) 274 kn->kn_fflags |= event; 275 276 /* 277 * process is gone, so flag the event as finished. 278 */ 279 if (event == NOTE_EXIT) { 280 kn->kn_status |= KN_DETACHED; 281 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 282 return (1); 283 } 284 285 /* 286 * process forked, and user wants to track the new process, 287 * so attach a new knote to it, and immediately report an 288 * event with the parent's pid. 289 */ 290 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 291 struct kevent kev; 292 int error; 293 294 /* 295 * register knote with new process. 296 */ 297 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 298 kev.filter = kn->kn_filter; 299 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 300 kev.fflags = kn->kn_sfflags; 301 kev.data = kn->kn_id; /* parent */ 302 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 303 error = kqueue_register(kn->kn_kq, &kev, NULL); 304 if (error) 305 kn->kn_fflags |= NOTE_TRACKERR; 306 } 307 308 return (kn->kn_fflags != 0); 309 } 310 311 static void 312 filt_timerexpire(void *knx) 313 { 314 struct knote *kn = knx; 315 struct callout *calloutp; 316 struct timeval tv; 317 int tticks; 318 319 kn->kn_data++; 320 KNOTE_ACTIVATE(kn); 321 322 if ((kn->kn_flags & EV_ONESHOT) == 0) { 323 tv.tv_sec = kn->kn_sdata / 1000; 324 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 325 tticks = tvtohz(&tv); 326 calloutp = (struct callout *)kn->kn_hook; 327 callout_reset(calloutp, tticks, filt_timerexpire, kn); 328 } 329 } 330 331 /* 332 * data contains amount of time to sleep, in milliseconds 333 */ 334 static int 335 filt_timerattach(struct knote *kn) 336 { 337 struct callout *calloutp; 338 struct timeval tv; 339 int tticks; 340 341 if (kq_ncallouts >= kq_calloutmax) 342 return (ENOMEM); 343 kq_ncallouts++; 344 345 tv.tv_sec = kn->kn_sdata / 1000; 346 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 347 tticks = tvtohz(&tv); 348 349 kn->kn_flags |= EV_CLEAR; /* automatically set */ 350 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 351 M_KQUEUE, M_WAITOK); 352 callout_init(calloutp, 0); 353 kn->kn_hook = calloutp; 354 callout_reset(calloutp, tticks, filt_timerexpire, kn); 355 356 return (0); 357 } 358 359 static void 360 filt_timerdetach(struct knote *kn) 361 { 362 struct callout *calloutp; 363 364 calloutp = (struct callout *)kn->kn_hook; 365 callout_drain(calloutp); 366 FREE(calloutp, M_KQUEUE); 367 kq_ncallouts--; 368 } 369 370 static int 371 filt_timer(struct knote *kn, long hint) 372 { 373 374 return (kn->kn_data != 0); 375 } 376 377 /* 378 * MPSAFE 379 */ 380 int 381 kqueue(struct thread *td, struct kqueue_args *uap) 382 { 383 struct filedesc *fdp; 384 struct kqueue *kq; 385 struct file *fp; 386 int fd, error; 387 388 mtx_lock(&Giant); 389 fdp = td->td_proc->p_fd; 390 error = falloc(td, &fp, &fd); 391 if (error) 392 goto done2; 393 /* An extra reference on `nfp' has been held for us by falloc(). */ 394 kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 395 TAILQ_INIT(&kq->kq_head); 396 FILE_LOCK(fp); 397 fp->f_flag = FREAD | FWRITE; 398 fp->f_type = DTYPE_KQUEUE; 399 fp->f_ops = &kqueueops; 400 fp->f_data = kq; 401 FILE_UNLOCK(fp); 402 fdrop(fp, td); 403 FILEDESC_LOCK(fdp); 404 td->td_retval[0] = fd; 405 if (fdp->fd_knlistsize < 0) 406 fdp->fd_knlistsize = 0; /* this process has a kq */ 407 FILEDESC_UNLOCK(fdp); 408 kq->kq_fdp = fdp; 409 done2: 410 mtx_unlock(&Giant); 411 return (error); 412 } 413 414 #ifndef _SYS_SYSPROTO_H_ 415 struct kevent_args { 416 int fd; 417 const struct kevent *changelist; 418 int nchanges; 419 struct kevent *eventlist; 420 int nevents; 421 const struct timespec *timeout; 422 }; 423 #endif 424 /* 425 * MPSAFE 426 */ 427 int 428 kevent(struct thread *td, struct kevent_args *uap) 429 { 430 struct kevent *kevp; 431 struct kqueue *kq; 432 struct file *fp; 433 struct timespec ts; 434 int i, n, nerrors, error; 435 436 if ((error = fget(td, uap->fd, &fp)) != 0) 437 return (error); 438 if (fp->f_type != DTYPE_KQUEUE) { 439 fdrop(fp, td); 440 return (EBADF); 441 } 442 if (uap->timeout != NULL) { 443 error = copyin(uap->timeout, &ts, sizeof(ts)); 444 if (error) 445 goto done_nogiant; 446 uap->timeout = &ts; 447 } 448 mtx_lock(&Giant); 449 450 kq = fp->f_data; 451 nerrors = 0; 452 453 while (uap->nchanges > 0) { 454 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 455 error = copyin(uap->changelist, kq->kq_kev, 456 n * sizeof(struct kevent)); 457 if (error) 458 goto done; 459 for (i = 0; i < n; i++) { 460 kevp = &kq->kq_kev[i]; 461 kevp->flags &= ~EV_SYSFLAGS; 462 error = kqueue_register(kq, kevp, td); 463 if (error) { 464 if (uap->nevents != 0) { 465 kevp->flags = EV_ERROR; 466 kevp->data = error; 467 (void) copyout(kevp, 468 uap->eventlist, 469 sizeof(*kevp)); 470 uap->eventlist++; 471 uap->nevents--; 472 nerrors++; 473 } else { 474 goto done; 475 } 476 } 477 } 478 uap->nchanges -= n; 479 uap->changelist += n; 480 } 481 if (nerrors) { 482 td->td_retval[0] = nerrors; 483 error = 0; 484 goto done; 485 } 486 487 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td); 488 done: 489 mtx_unlock(&Giant); 490 done_nogiant: 491 if (fp != NULL) 492 fdrop(fp, td); 493 return (error); 494 } 495 496 int 497 kqueue_add_filteropts(int filt, struct filterops *filtops) 498 { 499 500 if (filt > 0) 501 panic("filt(%d) > 0", filt); 502 if (filt + EVFILT_SYSCOUNT < 0) 503 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 504 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 505 if (sysfilt_ops[~filt] != &null_filtops) 506 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 507 sysfilt_ops[~filt] = filtops; 508 return (0); 509 } 510 511 int 512 kqueue_del_filteropts(int filt) 513 { 514 515 if (filt > 0) 516 panic("filt(%d) > 0", filt); 517 if (filt + EVFILT_SYSCOUNT < 0) 518 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 519 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 520 if (sysfilt_ops[~filt] == &null_filtops) 521 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 522 sysfilt_ops[~filt] = &null_filtops; 523 return (0); 524 } 525 526 int 527 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) 528 { 529 struct filedesc *fdp = kq->kq_fdp; 530 struct filterops *fops; 531 struct file *fp = NULL; 532 struct knote *kn = NULL; 533 int s, error = 0; 534 535 if (kev->filter < 0) { 536 if (kev->filter + EVFILT_SYSCOUNT < 0) 537 return (EINVAL); 538 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 539 } else { 540 /* 541 * XXX 542 * filter attach routine is responsible for insuring that 543 * the identifier can be attached to it. 544 */ 545 printf("unknown filter: %d\n", kev->filter); 546 return (EINVAL); 547 } 548 549 FILEDESC_LOCK(fdp); 550 if (fops->f_isfd) { 551 /* validate descriptor */ 552 if ((u_int)kev->ident >= fdp->fd_nfiles || 553 (fp = fdp->fd_ofiles[kev->ident]) == NULL) { 554 FILEDESC_UNLOCK(fdp); 555 return (EBADF); 556 } 557 fhold(fp); 558 559 if (kev->ident < fdp->fd_knlistsize) { 560 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 561 if (kq == kn->kn_kq && 562 kev->filter == kn->kn_filter) 563 break; 564 } 565 } else { 566 if (fdp->fd_knhashmask != 0) { 567 struct klist *list; 568 569 list = &fdp->fd_knhash[ 570 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 571 SLIST_FOREACH(kn, list, kn_link) 572 if (kev->ident == kn->kn_id && 573 kq == kn->kn_kq && 574 kev->filter == kn->kn_filter) 575 break; 576 } 577 } 578 FILEDESC_UNLOCK(fdp); 579 580 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 581 error = ENOENT; 582 goto done; 583 } 584 585 /* 586 * kn now contains the matching knote, or NULL if no match 587 */ 588 if (kev->flags & EV_ADD) { 589 590 if (kn == NULL) { 591 kn = knote_alloc(); 592 if (kn == NULL) { 593 error = ENOMEM; 594 goto done; 595 } 596 kn->kn_fp = fp; 597 kn->kn_kq = kq; 598 kn->kn_fop = fops; 599 600 /* 601 * apply reference count to knote structure, and 602 * do not release it at the end of this routine. 603 */ 604 fp = NULL; 605 606 kn->kn_sfflags = kev->fflags; 607 kn->kn_sdata = kev->data; 608 kev->fflags = 0; 609 kev->data = 0; 610 kn->kn_kevent = *kev; 611 612 knote_attach(kn, fdp); 613 if ((error = fops->f_attach(kn)) != 0) { 614 knote_drop(kn, td); 615 goto done; 616 } 617 } else { 618 /* 619 * The user may change some filter values after the 620 * initial EV_ADD, but doing so will not reset any 621 * filter which has already been triggered. 622 */ 623 kn->kn_sfflags = kev->fflags; 624 kn->kn_sdata = kev->data; 625 kn->kn_kevent.udata = kev->udata; 626 } 627 628 s = splhigh(); 629 if (kn->kn_fop->f_event(kn, 0)) 630 KNOTE_ACTIVATE(kn); 631 splx(s); 632 633 } else if (kev->flags & EV_DELETE) { 634 kn->kn_fop->f_detach(kn); 635 knote_drop(kn, td); 636 goto done; 637 } 638 639 if ((kev->flags & EV_DISABLE) && 640 ((kn->kn_status & KN_DISABLED) == 0)) { 641 s = splhigh(); 642 kn->kn_status |= KN_DISABLED; 643 splx(s); 644 } 645 646 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 647 s = splhigh(); 648 kn->kn_status &= ~KN_DISABLED; 649 if ((kn->kn_status & KN_ACTIVE) && 650 ((kn->kn_status & KN_QUEUED) == 0)) 651 knote_enqueue(kn); 652 splx(s); 653 } 654 655 done: 656 if (fp != NULL) 657 fdrop(fp, td); 658 return (error); 659 } 660 661 static int 662 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 663 const struct timespec *tsp, struct thread *td) 664 { 665 struct kqueue *kq; 666 struct kevent *kevp; 667 struct timeval atv, rtv, ttv; 668 struct knote *kn, marker; 669 int s, count, timeout, nkev = 0, error = 0; 670 671 FILE_LOCK_ASSERT(fp, MA_NOTOWNED); 672 673 kq = fp->f_data; 674 count = maxevents; 675 if (count == 0) 676 goto done; 677 678 if (tsp != NULL) { 679 TIMESPEC_TO_TIMEVAL(&atv, tsp); 680 if (itimerfix(&atv)) { 681 error = EINVAL; 682 goto done; 683 } 684 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 685 timeout = -1; 686 else 687 timeout = atv.tv_sec > 24 * 60 * 60 ? 688 24 * 60 * 60 * hz : tvtohz(&atv); 689 getmicrouptime(&rtv); 690 timevaladd(&atv, &rtv); 691 } else { 692 atv.tv_sec = 0; 693 atv.tv_usec = 0; 694 timeout = 0; 695 } 696 goto start; 697 698 retry: 699 if (atv.tv_sec || atv.tv_usec) { 700 getmicrouptime(&rtv); 701 if (timevalcmp(&rtv, &atv, >=)) 702 goto done; 703 ttv = atv; 704 timevalsub(&ttv, &rtv); 705 timeout = ttv.tv_sec > 24 * 60 * 60 ? 706 24 * 60 * 60 * hz : tvtohz(&ttv); 707 } 708 709 start: 710 kevp = kq->kq_kev; 711 s = splhigh(); 712 if (kq->kq_count == 0) { 713 if (timeout < 0) { 714 error = EWOULDBLOCK; 715 } else { 716 kq->kq_state |= KQ_SLEEP; 717 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 718 } 719 splx(s); 720 if (error == 0) 721 goto retry; 722 /* don't restart after signals... */ 723 if (error == ERESTART) 724 error = EINTR; 725 else if (error == EWOULDBLOCK) 726 error = 0; 727 goto done; 728 } 729 730 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 731 while (count) { 732 kn = TAILQ_FIRST(&kq->kq_head); 733 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 734 if (kn == &marker) { 735 splx(s); 736 if (count == maxevents) 737 goto retry; 738 goto done; 739 } 740 if (kn->kn_status & KN_DISABLED) { 741 kn->kn_status &= ~KN_QUEUED; 742 kq->kq_count--; 743 continue; 744 } 745 if ((kn->kn_flags & EV_ONESHOT) == 0 && 746 kn->kn_fop->f_event(kn, 0) == 0) { 747 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 748 kq->kq_count--; 749 continue; 750 } 751 *kevp = kn->kn_kevent; 752 kevp++; 753 nkev++; 754 if (kn->kn_flags & EV_ONESHOT) { 755 kn->kn_status &= ~KN_QUEUED; 756 kq->kq_count--; 757 splx(s); 758 kn->kn_fop->f_detach(kn); 759 knote_drop(kn, td); 760 s = splhigh(); 761 } else if (kn->kn_flags & EV_CLEAR) { 762 kn->kn_data = 0; 763 kn->kn_fflags = 0; 764 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 765 kq->kq_count--; 766 } else { 767 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 768 } 769 count--; 770 if (nkev == KQ_NEVENTS) { 771 splx(s); 772 error = copyout(&kq->kq_kev, ulistp, 773 sizeof(struct kevent) * nkev); 774 ulistp += nkev; 775 nkev = 0; 776 kevp = kq->kq_kev; 777 s = splhigh(); 778 if (error) 779 break; 780 } 781 } 782 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 783 splx(s); 784 done: 785 if (nkev != 0) 786 error = copyout(&kq->kq_kev, ulistp, 787 sizeof(struct kevent) * nkev); 788 td->td_retval[0] = maxevents - count; 789 return (error); 790 } 791 792 /* 793 * XXX 794 * This could be expanded to call kqueue_scan, if desired. 795 */ 796 /*ARGSUSED*/ 797 static int 798 kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 799 int flags, struct thread *td) 800 { 801 return (ENXIO); 802 } 803 804 /*ARGSUSED*/ 805 static int 806 kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 807 int flags, struct thread *td) 808 { 809 return (ENXIO); 810 } 811 812 /*ARGSUSED*/ 813 static int 814 kqueue_ioctl(struct file *fp, u_long com, void *data, 815 struct ucred *active_cred, struct thread *td) 816 { 817 return (ENOTTY); 818 } 819 820 /*ARGSUSED*/ 821 static int 822 kqueue_poll(struct file *fp, int events, struct ucred *active_cred, 823 struct thread *td) 824 { 825 struct kqueue *kq; 826 int revents = 0; 827 int s = splnet(); 828 829 kq = fp->f_data; 830 if (events & (POLLIN | POLLRDNORM)) { 831 if (kq->kq_count) { 832 revents |= events & (POLLIN | POLLRDNORM); 833 } else { 834 selrecord(td, &kq->kq_sel); 835 kq->kq_state |= KQ_SEL; 836 } 837 } 838 splx(s); 839 return (revents); 840 } 841 842 /*ARGSUSED*/ 843 static int 844 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 845 struct thread *td) 846 { 847 struct kqueue *kq; 848 849 kq = fp->f_data; 850 bzero((void *)st, sizeof(*st)); 851 st->st_size = kq->kq_count; 852 st->st_blksize = sizeof(struct kevent); 853 st->st_mode = S_IFIFO; 854 return (0); 855 } 856 857 /*ARGSUSED*/ 858 static int 859 kqueue_close(struct file *fp, struct thread *td) 860 { 861 struct kqueue *kq = fp->f_data; 862 struct filedesc *fdp = kq->kq_fdp; 863 struct knote **knp, *kn, *kn0; 864 int i; 865 866 GIANT_REQUIRED; 867 868 FILEDESC_LOCK(fdp); 869 for (i = 0; i < fdp->fd_knlistsize; i++) { 870 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 871 kn = *knp; 872 while (kn != NULL) { 873 kn0 = SLIST_NEXT(kn, kn_link); 874 if (kq == kn->kn_kq) { 875 kn->kn_fop->f_detach(kn); 876 *knp = kn0; 877 FILE_LOCK(kn->kn_fp); 878 FILEDESC_UNLOCK(fdp); 879 fdrop_locked(kn->kn_fp, td); 880 knote_free(kn); 881 FILEDESC_LOCK(fdp); 882 } else { 883 knp = &SLIST_NEXT(kn, kn_link); 884 } 885 kn = kn0; 886 } 887 } 888 if (fdp->fd_knhashmask != 0) { 889 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 890 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 891 kn = *knp; 892 while (kn != NULL) { 893 kn0 = SLIST_NEXT(kn, kn_link); 894 if (kq == kn->kn_kq) { 895 kn->kn_fop->f_detach(kn); 896 *knp = kn0; 897 /* XXX non-fd release of kn->kn_ptr */ 898 FILEDESC_UNLOCK(fdp); 899 knote_free(kn); 900 FILEDESC_LOCK(fdp); 901 } else { 902 knp = &SLIST_NEXT(kn, kn_link); 903 } 904 kn = kn0; 905 } 906 } 907 } 908 FILEDESC_UNLOCK(fdp); 909 if (kq->kq_state & KQ_SEL) { 910 kq->kq_state &= ~KQ_SEL; 911 selwakeuppri(&kq->kq_sel, PSOCK); 912 } 913 free(kq, M_KQUEUE); 914 fp->f_data = NULL; 915 916 return (0); 917 } 918 919 static void 920 kqueue_wakeup(struct kqueue *kq) 921 { 922 923 if (kq->kq_state & KQ_SLEEP) { 924 kq->kq_state &= ~KQ_SLEEP; 925 wakeup(kq); 926 } 927 if (kq->kq_state & KQ_SEL) { 928 kq->kq_state &= ~KQ_SEL; 929 selwakeuppri(&kq->kq_sel, PSOCK); 930 } 931 KNOTE(&kq->kq_sel.si_note, 0); 932 } 933 934 /* 935 * walk down a list of knotes, activating them if their event has triggered. 936 */ 937 void 938 knote(struct klist *list, long hint) 939 { 940 struct knote *kn; 941 942 SLIST_FOREACH(kn, list, kn_selnext) 943 if (kn->kn_fop->f_event(kn, hint)) 944 KNOTE_ACTIVATE(kn); 945 } 946 947 /* 948 * remove all knotes from a specified klist 949 */ 950 void 951 knote_remove(struct thread *td, struct klist *list) 952 { 953 struct knote *kn; 954 955 while ((kn = SLIST_FIRST(list)) != NULL) { 956 kn->kn_fop->f_detach(kn); 957 knote_drop(kn, td); 958 } 959 } 960 961 /* 962 * remove all knotes referencing a specified fd 963 */ 964 void 965 knote_fdclose(struct thread *td, int fd) 966 { 967 struct filedesc *fdp = td->td_proc->p_fd; 968 struct klist *list; 969 970 FILEDESC_LOCK(fdp); 971 list = &fdp->fd_knlist[fd]; 972 FILEDESC_UNLOCK(fdp); 973 knote_remove(td, list); 974 } 975 976 static void 977 knote_attach(struct knote *kn, struct filedesc *fdp) 978 { 979 struct klist *list, *tmp_knhash; 980 u_long tmp_knhashmask; 981 int size; 982 983 FILEDESC_LOCK(fdp); 984 985 if (! kn->kn_fop->f_isfd) { 986 if (fdp->fd_knhashmask == 0) { 987 FILEDESC_UNLOCK(fdp); 988 tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 989 &tmp_knhashmask); 990 FILEDESC_LOCK(fdp); 991 if (fdp->fd_knhashmask == 0) { 992 fdp->fd_knhash = tmp_knhash; 993 fdp->fd_knhashmask = tmp_knhashmask; 994 } else { 995 free(tmp_knhash, M_KQUEUE); 996 } 997 } 998 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 999 goto done; 1000 } 1001 1002 if (fdp->fd_knlistsize <= kn->kn_id) { 1003 size = fdp->fd_knlistsize; 1004 while (size <= kn->kn_id) 1005 size += KQEXTENT; 1006 FILEDESC_UNLOCK(fdp); 1007 MALLOC(list, struct klist *, 1008 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); 1009 FILEDESC_LOCK(fdp); 1010 if (fdp->fd_knlistsize > kn->kn_id) { 1011 FREE(list, M_KQUEUE); 1012 goto bigenough; 1013 } 1014 if (fdp->fd_knlist != NULL) { 1015 bcopy(fdp->fd_knlist, list, 1016 fdp->fd_knlistsize * sizeof(struct klist *)); 1017 FREE(fdp->fd_knlist, M_KQUEUE); 1018 } 1019 bzero((caddr_t)list + 1020 fdp->fd_knlistsize * sizeof(struct klist *), 1021 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 1022 fdp->fd_knlistsize = size; 1023 fdp->fd_knlist = list; 1024 } 1025 bigenough: 1026 list = &fdp->fd_knlist[kn->kn_id]; 1027 done: 1028 FILEDESC_UNLOCK(fdp); 1029 SLIST_INSERT_HEAD(list, kn, kn_link); 1030 kn->kn_status = 0; 1031 } 1032 1033 /* 1034 * should be called at spl == 0, since we don't want to hold spl 1035 * while calling fdrop and free. 1036 */ 1037 static void 1038 knote_drop(struct knote *kn, struct thread *td) 1039 { 1040 struct filedesc *fdp = td->td_proc->p_fd; 1041 struct klist *list; 1042 1043 FILEDESC_LOCK(fdp); 1044 if (kn->kn_fop->f_isfd) 1045 list = &fdp->fd_knlist[kn->kn_id]; 1046 else 1047 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1048 if (kn->kn_fop->f_isfd) 1049 FILE_LOCK(kn->kn_fp); 1050 FILEDESC_UNLOCK(fdp); 1051 1052 SLIST_REMOVE(list, kn, knote, kn_link); 1053 if (kn->kn_status & KN_QUEUED) 1054 knote_dequeue(kn); 1055 if (kn->kn_fop->f_isfd) 1056 fdrop_locked(kn->kn_fp, td); 1057 knote_free(kn); 1058 } 1059 1060 1061 static void 1062 knote_enqueue(struct knote *kn) 1063 { 1064 struct kqueue *kq = kn->kn_kq; 1065 int s = splhigh(); 1066 1067 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1068 1069 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1070 kn->kn_status |= KN_QUEUED; 1071 kq->kq_count++; 1072 splx(s); 1073 kqueue_wakeup(kq); 1074 } 1075 1076 static void 1077 knote_dequeue(struct knote *kn) 1078 { 1079 struct kqueue *kq = kn->kn_kq; 1080 int s = splhigh(); 1081 1082 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1083 1084 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1085 kn->kn_status &= ~KN_QUEUED; 1086 kq->kq_count--; 1087 splx(s); 1088 } 1089 1090 static void 1091 knote_init(void) 1092 { 1093 knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 1094 NULL, NULL, UMA_ALIGN_PTR, 0); 1095 1096 } 1097 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1098 1099 static struct knote * 1100 knote_alloc(void) 1101 { 1102 return ((struct knote *)uma_zalloc(knote_zone, M_WAITOK)); 1103 } 1104 1105 static void 1106 knote_free(struct knote *kn) 1107 { 1108 uma_zfree(knote_zone, kn); 1109 } 1110