1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/lock.h> 33 #include <sys/mutex.h> 34 #include <sys/proc.h> 35 #include <sys/malloc.h> 36 #include <sys/unistd.h> 37 #include <sys/file.h> 38 #include <sys/filedesc.h> 39 #include <sys/fcntl.h> 40 #include <sys/selinfo.h> 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <sys/eventvar.h> 44 #include <sys/poll.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/stat.h> 49 #include <sys/sysctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/uio.h> 52 53 #include <vm/uma.h> 54 55 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 56 57 static int kqueue_scan(struct file *fp, int maxevents, 58 struct kevent *ulistp, const struct timespec *timeout, 59 struct thread *td); 60 static void kqueue_wakeup(struct kqueue *kq); 61 62 static fo_rdwr_t kqueue_read; 63 static fo_rdwr_t kqueue_write; 64 static fo_ioctl_t kqueue_ioctl; 65 static fo_poll_t kqueue_poll; 66 static fo_kqfilter_t kqueue_kqfilter; 67 static fo_stat_t kqueue_stat; 68 static fo_close_t kqueue_close; 69 70 static struct fileops kqueueops = { 71 kqueue_read, 72 kqueue_write, 73 kqueue_ioctl, 74 kqueue_poll, 75 kqueue_kqfilter, 76 kqueue_stat, 77 kqueue_close 78 }; 79 80 static void knote_attach(struct knote *kn, struct filedesc *fdp); 81 static void knote_drop(struct knote *kn, struct thread *td); 82 static void knote_enqueue(struct knote *kn); 83 static void knote_dequeue(struct knote *kn); 84 static void knote_init(void); 85 static struct knote *knote_alloc(void); 86 static void knote_free(struct knote *kn); 87 88 static void filt_kqdetach(struct knote *kn); 89 static int filt_kqueue(struct knote *kn, long hint); 90 static int filt_procattach(struct knote *kn); 91 static void filt_procdetach(struct knote *kn); 92 static int filt_proc(struct knote *kn, long hint); 93 static int filt_fileattach(struct knote *kn); 94 static void filt_timerexpire(void *knx); 95 static int filt_timerattach(struct knote *kn); 96 static void filt_timerdetach(struct knote *kn); 97 static int filt_timer(struct knote *kn, long hint); 98 99 static struct filterops file_filtops = 100 { 1, filt_fileattach, NULL, NULL }; 101 static struct filterops kqread_filtops = 102 { 1, NULL, filt_kqdetach, filt_kqueue }; 103 static struct filterops proc_filtops = 104 { 0, filt_procattach, filt_procdetach, filt_proc }; 105 static struct filterops timer_filtops = 106 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 107 108 static uma_zone_t knote_zone; 109 static int kq_ncallouts = 0; 110 static int kq_calloutmax = (4 * 1024); 111 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 112 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 113 114 #define KNOTE_ACTIVATE(kn) do { \ 115 kn->kn_status |= KN_ACTIVE; \ 116 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 117 knote_enqueue(kn); \ 118 } while(0) 119 120 #define KN_HASHSIZE 64 /* XXX should be tunable */ 121 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 122 123 static int 124 filt_nullattach(struct knote *kn) 125 { 126 127 return (ENXIO); 128 }; 129 130 struct filterops null_filtops = 131 { 0, filt_nullattach, NULL, NULL }; 132 133 extern struct filterops sig_filtops; 134 135 /* 136 * Table for for all system-defined filters. 137 */ 138 static struct filterops *sysfilt_ops[] = { 139 &file_filtops, /* EVFILT_READ */ 140 &file_filtops, /* EVFILT_WRITE */ 141 &null_filtops, /* EVFILT_AIO */ 142 &file_filtops, /* EVFILT_VNODE */ 143 &proc_filtops, /* EVFILT_PROC */ 144 &sig_filtops, /* EVFILT_SIGNAL */ 145 &timer_filtops, /* EVFILT_TIMER */ 146 &file_filtops, /* EVFILT_NETDEV */ 147 }; 148 149 static int 150 filt_fileattach(struct knote *kn) 151 { 152 153 return (fo_kqfilter(kn->kn_fp, kn)); 154 } 155 156 /*ARGSUSED*/ 157 static int 158 kqueue_kqfilter(struct file *fp, struct knote *kn) 159 { 160 struct kqueue *kq = kn->kn_fp->f_data; 161 162 if (kn->kn_filter != EVFILT_READ) 163 return (1); 164 165 kn->kn_fop = &kqread_filtops; 166 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 167 return (0); 168 } 169 170 static void 171 filt_kqdetach(struct knote *kn) 172 { 173 struct kqueue *kq = kn->kn_fp->f_data; 174 175 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 176 } 177 178 /*ARGSUSED*/ 179 static int 180 filt_kqueue(struct knote *kn, long hint) 181 { 182 struct kqueue *kq = kn->kn_fp->f_data; 183 184 kn->kn_data = kq->kq_count; 185 return (kn->kn_data > 0); 186 } 187 188 static int 189 filt_procattach(struct knote *kn) 190 { 191 struct proc *p; 192 int error; 193 194 p = pfind(kn->kn_id); 195 if (p == NULL) 196 return (ESRCH); 197 if ((error = p_cansee(curthread, p))) { 198 PROC_UNLOCK(p); 199 return (error); 200 } 201 202 kn->kn_ptr.p_proc = p; 203 kn->kn_flags |= EV_CLEAR; /* automatically set */ 204 205 /* 206 * internal flag indicating registration done by kernel 207 */ 208 if (kn->kn_flags & EV_FLAG1) { 209 kn->kn_data = kn->kn_sdata; /* ppid */ 210 kn->kn_fflags = NOTE_CHILD; 211 kn->kn_flags &= ~EV_FLAG1; 212 } 213 214 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 215 PROC_UNLOCK(p); 216 217 return (0); 218 } 219 220 /* 221 * The knote may be attached to a different process, which may exit, 222 * leaving nothing for the knote to be attached to. So when the process 223 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 224 * it will be deleted when read out. However, as part of the knote deletion, 225 * this routine is called, so a check is needed to avoid actually performing 226 * a detach, because the original process does not exist any more. 227 */ 228 static void 229 filt_procdetach(struct knote *kn) 230 { 231 struct proc *p = kn->kn_ptr.p_proc; 232 233 if (kn->kn_status & KN_DETACHED) 234 return; 235 236 PROC_LOCK(p); 237 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 238 PROC_UNLOCK(p); 239 } 240 241 static int 242 filt_proc(struct knote *kn, long hint) 243 { 244 u_int event; 245 246 /* 247 * mask off extra data 248 */ 249 event = (u_int)hint & NOTE_PCTRLMASK; 250 251 /* 252 * if the user is interested in this event, record it. 253 */ 254 if (kn->kn_sfflags & event) 255 kn->kn_fflags |= event; 256 257 /* 258 * process is gone, so flag the event as finished. 259 */ 260 if (event == NOTE_EXIT) { 261 kn->kn_status |= KN_DETACHED; 262 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 263 return (1); 264 } 265 266 /* 267 * process forked, and user wants to track the new process, 268 * so attach a new knote to it, and immediately report an 269 * event with the parent's pid. 270 */ 271 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 272 struct kevent kev; 273 int error; 274 275 /* 276 * register knote with new process. 277 */ 278 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 279 kev.filter = kn->kn_filter; 280 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 281 kev.fflags = kn->kn_sfflags; 282 kev.data = kn->kn_id; /* parent */ 283 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 284 error = kqueue_register(kn->kn_kq, &kev, NULL); 285 if (error) 286 kn->kn_fflags |= NOTE_TRACKERR; 287 } 288 289 return (kn->kn_fflags != 0); 290 } 291 292 static void 293 filt_timerexpire(void *knx) 294 { 295 struct knote *kn = knx; 296 struct callout *calloutp; 297 struct timeval tv; 298 int tticks; 299 300 kn->kn_data++; 301 KNOTE_ACTIVATE(kn); 302 303 if ((kn->kn_flags & EV_ONESHOT) == 0) { 304 tv.tv_sec = kn->kn_sdata / 1000; 305 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 306 tticks = tvtohz(&tv); 307 calloutp = (struct callout *)kn->kn_hook; 308 callout_reset(calloutp, tticks, filt_timerexpire, kn); 309 } 310 } 311 312 /* 313 * data contains amount of time to sleep, in milliseconds 314 */ 315 static int 316 filt_timerattach(struct knote *kn) 317 { 318 struct callout *calloutp; 319 struct timeval tv; 320 int tticks; 321 322 if (kq_ncallouts >= kq_calloutmax) 323 return (ENOMEM); 324 kq_ncallouts++; 325 326 tv.tv_sec = kn->kn_sdata / 1000; 327 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 328 tticks = tvtohz(&tv); 329 330 kn->kn_flags |= EV_CLEAR; /* automatically set */ 331 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 332 M_KQUEUE, 0); 333 callout_init(calloutp, 0); 334 callout_reset(calloutp, tticks, filt_timerexpire, kn); 335 kn->kn_hook = calloutp; 336 337 return (0); 338 } 339 340 static void 341 filt_timerdetach(struct knote *kn) 342 { 343 struct callout *calloutp; 344 345 calloutp = (struct callout *)kn->kn_hook; 346 callout_stop(calloutp); 347 FREE(calloutp, M_KQUEUE); 348 kq_ncallouts--; 349 } 350 351 static int 352 filt_timer(struct knote *kn, long hint) 353 { 354 355 return (kn->kn_data != 0); 356 } 357 358 /* 359 * MPSAFE 360 */ 361 int 362 kqueue(struct thread *td, struct kqueue_args *uap) 363 { 364 struct filedesc *fdp; 365 struct kqueue *kq; 366 struct file *fp; 367 int fd, error; 368 369 mtx_lock(&Giant); 370 fdp = td->td_proc->p_fd; 371 error = falloc(td, &fp, &fd); 372 if (error) 373 goto done2; 374 kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_ZERO); 375 TAILQ_INIT(&kq->kq_head); 376 FILE_LOCK(fp); 377 fp->f_flag = FREAD | FWRITE; 378 fp->f_type = DTYPE_KQUEUE; 379 fp->f_ops = &kqueueops; 380 TAILQ_INIT(&kq->kq_head); 381 fp->f_data = kq; 382 FILE_UNLOCK(fp); 383 FILEDESC_LOCK(fdp); 384 td->td_retval[0] = fd; 385 if (fdp->fd_knlistsize < 0) 386 fdp->fd_knlistsize = 0; /* this process has a kq */ 387 FILEDESC_UNLOCK(fdp); 388 kq->kq_fdp = fdp; 389 done2: 390 mtx_unlock(&Giant); 391 return (error); 392 } 393 394 #ifndef _SYS_SYSPROTO_H_ 395 struct kevent_args { 396 int fd; 397 const struct kevent *changelist; 398 int nchanges; 399 struct kevent *eventlist; 400 int nevents; 401 const struct timespec *timeout; 402 }; 403 #endif 404 /* 405 * MPSAFE 406 */ 407 int 408 kevent(struct thread *td, struct kevent_args *uap) 409 { 410 struct kevent *kevp; 411 struct kqueue *kq; 412 struct file *fp; 413 struct timespec ts; 414 int i, n, nerrors, error; 415 416 if ((error = fget(td, uap->fd, &fp)) != 0) 417 return (error); 418 if (fp->f_type != DTYPE_KQUEUE) { 419 fdrop(fp, td); 420 return (EBADF); 421 } 422 if (uap->timeout != NULL) { 423 error = copyin(uap->timeout, &ts, sizeof(ts)); 424 if (error) 425 goto done_nogiant; 426 uap->timeout = &ts; 427 } 428 mtx_lock(&Giant); 429 430 kq = fp->f_data; 431 nerrors = 0; 432 433 while (uap->nchanges > 0) { 434 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 435 error = copyin(uap->changelist, kq->kq_kev, 436 n * sizeof(struct kevent)); 437 if (error) 438 goto done; 439 for (i = 0; i < n; i++) { 440 kevp = &kq->kq_kev[i]; 441 kevp->flags &= ~EV_SYSFLAGS; 442 error = kqueue_register(kq, kevp, td); 443 if (error) { 444 if (uap->nevents != 0) { 445 kevp->flags = EV_ERROR; 446 kevp->data = error; 447 (void) copyout(kevp, 448 uap->eventlist, 449 sizeof(*kevp)); 450 uap->eventlist++; 451 uap->nevents--; 452 nerrors++; 453 } else { 454 goto done; 455 } 456 } 457 } 458 uap->nchanges -= n; 459 uap->changelist += n; 460 } 461 if (nerrors) { 462 td->td_retval[0] = nerrors; 463 error = 0; 464 goto done; 465 } 466 467 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td); 468 done: 469 mtx_unlock(&Giant); 470 done_nogiant: 471 if (fp != NULL) 472 fdrop(fp, td); 473 return (error); 474 } 475 476 int 477 kqueue_add_filteropts(int filt, struct filterops *filtops) 478 { 479 480 if (filt > 0) 481 panic("filt(%d) > 0", filt); 482 if (filt + EVFILT_SYSCOUNT < 0) 483 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 484 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 485 if (sysfilt_ops[~filt] != &null_filtops) 486 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 487 sysfilt_ops[~filt] = filtops; 488 return (0); 489 } 490 491 int 492 kqueue_del_filteropts(int filt) 493 { 494 495 if (filt > 0) 496 panic("filt(%d) > 0", filt); 497 if (filt + EVFILT_SYSCOUNT < 0) 498 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 499 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 500 if (sysfilt_ops[~filt] == &null_filtops) 501 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 502 sysfilt_ops[~filt] = &null_filtops; 503 return (0); 504 } 505 506 int 507 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) 508 { 509 struct filedesc *fdp = kq->kq_fdp; 510 struct filterops *fops; 511 struct file *fp = NULL; 512 struct knote *kn = NULL; 513 int s, error = 0; 514 515 if (kev->filter < 0) { 516 if (kev->filter + EVFILT_SYSCOUNT < 0) 517 return (EINVAL); 518 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 519 } else { 520 /* 521 * XXX 522 * filter attach routine is responsible for insuring that 523 * the identifier can be attached to it. 524 */ 525 printf("unknown filter: %d\n", kev->filter); 526 return (EINVAL); 527 } 528 529 FILEDESC_LOCK(fdp); 530 if (fops->f_isfd) { 531 /* validate descriptor */ 532 if ((u_int)kev->ident >= fdp->fd_nfiles || 533 (fp = fdp->fd_ofiles[kev->ident]) == NULL) { 534 FILEDESC_UNLOCK(fdp); 535 return (EBADF); 536 } 537 fhold(fp); 538 539 if (kev->ident < fdp->fd_knlistsize) { 540 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 541 if (kq == kn->kn_kq && 542 kev->filter == kn->kn_filter) 543 break; 544 } 545 } else { 546 if (fdp->fd_knhashmask != 0) { 547 struct klist *list; 548 549 list = &fdp->fd_knhash[ 550 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 551 SLIST_FOREACH(kn, list, kn_link) 552 if (kev->ident == kn->kn_id && 553 kq == kn->kn_kq && 554 kev->filter == kn->kn_filter) 555 break; 556 } 557 } 558 FILEDESC_UNLOCK(fdp); 559 560 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 561 error = ENOENT; 562 goto done; 563 } 564 565 /* 566 * kn now contains the matching knote, or NULL if no match 567 */ 568 if (kev->flags & EV_ADD) { 569 570 if (kn == NULL) { 571 kn = knote_alloc(); 572 if (kn == NULL) { 573 error = ENOMEM; 574 goto done; 575 } 576 kn->kn_fp = fp; 577 kn->kn_kq = kq; 578 kn->kn_fop = fops; 579 580 /* 581 * apply reference count to knote structure, and 582 * do not release it at the end of this routine. 583 */ 584 fp = NULL; 585 586 kn->kn_sfflags = kev->fflags; 587 kn->kn_sdata = kev->data; 588 kev->fflags = 0; 589 kev->data = 0; 590 kn->kn_kevent = *kev; 591 592 knote_attach(kn, fdp); 593 if ((error = fops->f_attach(kn)) != 0) { 594 knote_drop(kn, td); 595 goto done; 596 } 597 } else { 598 /* 599 * The user may change some filter values after the 600 * initial EV_ADD, but doing so will not reset any 601 * filter which has already been triggered. 602 */ 603 kn->kn_sfflags = kev->fflags; 604 kn->kn_sdata = kev->data; 605 kn->kn_kevent.udata = kev->udata; 606 } 607 608 s = splhigh(); 609 if (kn->kn_fop->f_event(kn, 0)) 610 KNOTE_ACTIVATE(kn); 611 splx(s); 612 613 } else if (kev->flags & EV_DELETE) { 614 kn->kn_fop->f_detach(kn); 615 knote_drop(kn, td); 616 goto done; 617 } 618 619 if ((kev->flags & EV_DISABLE) && 620 ((kn->kn_status & KN_DISABLED) == 0)) { 621 s = splhigh(); 622 kn->kn_status |= KN_DISABLED; 623 splx(s); 624 } 625 626 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 627 s = splhigh(); 628 kn->kn_status &= ~KN_DISABLED; 629 if ((kn->kn_status & KN_ACTIVE) && 630 ((kn->kn_status & KN_QUEUED) == 0)) 631 knote_enqueue(kn); 632 splx(s); 633 } 634 635 done: 636 if (fp != NULL) 637 fdrop(fp, td); 638 return (error); 639 } 640 641 static int 642 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 643 const struct timespec *tsp, struct thread *td) 644 { 645 struct kqueue *kq; 646 struct kevent *kevp; 647 struct timeval atv, rtv, ttv; 648 struct knote *kn, marker; 649 int s, count, timeout, nkev = 0, error = 0; 650 651 FILE_LOCK_ASSERT(fp, MA_NOTOWNED); 652 653 kq = fp->f_data; 654 count = maxevents; 655 if (count == 0) 656 goto done; 657 658 if (tsp != NULL) { 659 TIMESPEC_TO_TIMEVAL(&atv, tsp); 660 if (itimerfix(&atv)) { 661 error = EINVAL; 662 goto done; 663 } 664 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 665 timeout = -1; 666 else 667 timeout = atv.tv_sec > 24 * 60 * 60 ? 668 24 * 60 * 60 * hz : tvtohz(&atv); 669 getmicrouptime(&rtv); 670 timevaladd(&atv, &rtv); 671 } else { 672 atv.tv_sec = 0; 673 atv.tv_usec = 0; 674 timeout = 0; 675 } 676 goto start; 677 678 retry: 679 if (atv.tv_sec || atv.tv_usec) { 680 getmicrouptime(&rtv); 681 if (timevalcmp(&rtv, &atv, >=)) 682 goto done; 683 ttv = atv; 684 timevalsub(&ttv, &rtv); 685 timeout = ttv.tv_sec > 24 * 60 * 60 ? 686 24 * 60 * 60 * hz : tvtohz(&ttv); 687 } 688 689 start: 690 kevp = kq->kq_kev; 691 s = splhigh(); 692 if (kq->kq_count == 0) { 693 if (timeout < 0) { 694 error = EWOULDBLOCK; 695 } else { 696 kq->kq_state |= KQ_SLEEP; 697 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 698 } 699 splx(s); 700 if (error == 0) 701 goto retry; 702 /* don't restart after signals... */ 703 if (error == ERESTART) 704 error = EINTR; 705 else if (error == EWOULDBLOCK) 706 error = 0; 707 goto done; 708 } 709 710 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 711 while (count) { 712 kn = TAILQ_FIRST(&kq->kq_head); 713 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 714 if (kn == &marker) { 715 splx(s); 716 if (count == maxevents) 717 goto retry; 718 goto done; 719 } 720 if (kn->kn_status & KN_DISABLED) { 721 kn->kn_status &= ~KN_QUEUED; 722 kq->kq_count--; 723 continue; 724 } 725 if ((kn->kn_flags & EV_ONESHOT) == 0 && 726 kn->kn_fop->f_event(kn, 0) == 0) { 727 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 728 kq->kq_count--; 729 continue; 730 } 731 *kevp = kn->kn_kevent; 732 kevp++; 733 nkev++; 734 if (kn->kn_flags & EV_ONESHOT) { 735 kn->kn_status &= ~KN_QUEUED; 736 kq->kq_count--; 737 splx(s); 738 kn->kn_fop->f_detach(kn); 739 knote_drop(kn, td); 740 s = splhigh(); 741 } else if (kn->kn_flags & EV_CLEAR) { 742 kn->kn_data = 0; 743 kn->kn_fflags = 0; 744 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 745 kq->kq_count--; 746 } else { 747 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 748 } 749 count--; 750 if (nkev == KQ_NEVENTS) { 751 splx(s); 752 error = copyout(&kq->kq_kev, ulistp, 753 sizeof(struct kevent) * nkev); 754 ulistp += nkev; 755 nkev = 0; 756 kevp = kq->kq_kev; 757 s = splhigh(); 758 if (error) 759 break; 760 } 761 } 762 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 763 splx(s); 764 done: 765 if (nkev != 0) 766 error = copyout(&kq->kq_kev, ulistp, 767 sizeof(struct kevent) * nkev); 768 td->td_retval[0] = maxevents - count; 769 return (error); 770 } 771 772 /* 773 * XXX 774 * This could be expanded to call kqueue_scan, if desired. 775 */ 776 /*ARGSUSED*/ 777 static int 778 kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 779 int flags, struct thread *td) 780 { 781 return (ENXIO); 782 } 783 784 /*ARGSUSED*/ 785 static int 786 kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 787 int flags, struct thread *td) 788 { 789 return (ENXIO); 790 } 791 792 /*ARGSUSED*/ 793 static int 794 kqueue_ioctl(struct file *fp, u_long com, void *data, 795 struct ucred *active_cred, struct thread *td) 796 { 797 return (ENOTTY); 798 } 799 800 /*ARGSUSED*/ 801 static int 802 kqueue_poll(struct file *fp, int events, struct ucred *active_cred, 803 struct thread *td) 804 { 805 struct kqueue *kq; 806 int revents = 0; 807 int s = splnet(); 808 809 kq = fp->f_data; 810 if (events & (POLLIN | POLLRDNORM)) { 811 if (kq->kq_count) { 812 revents |= events & (POLLIN | POLLRDNORM); 813 } else { 814 selrecord(td, &kq->kq_sel); 815 kq->kq_state |= KQ_SEL; 816 } 817 } 818 splx(s); 819 return (revents); 820 } 821 822 /*ARGSUSED*/ 823 static int 824 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 825 struct thread *td) 826 { 827 struct kqueue *kq; 828 829 kq = fp->f_data; 830 bzero((void *)st, sizeof(*st)); 831 st->st_size = kq->kq_count; 832 st->st_blksize = sizeof(struct kevent); 833 st->st_mode = S_IFIFO; 834 return (0); 835 } 836 837 /*ARGSUSED*/ 838 static int 839 kqueue_close(struct file *fp, struct thread *td) 840 { 841 struct kqueue *kq = fp->f_data; 842 struct filedesc *fdp = td->td_proc->p_fd; 843 struct knote **knp, *kn, *kn0; 844 int i; 845 846 FILEDESC_LOCK(fdp); 847 for (i = 0; i < fdp->fd_knlistsize; i++) { 848 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 849 kn = *knp; 850 while (kn != NULL) { 851 kn0 = SLIST_NEXT(kn, kn_link); 852 if (kq == kn->kn_kq) { 853 kn->kn_fop->f_detach(kn); 854 *knp = kn0; 855 FILE_LOCK(kn->kn_fp); 856 FILEDESC_UNLOCK(fdp); 857 fdrop_locked(kn->kn_fp, td); 858 knote_free(kn); 859 FILEDESC_LOCK(fdp); 860 } else { 861 knp = &SLIST_NEXT(kn, kn_link); 862 } 863 kn = kn0; 864 } 865 } 866 if (fdp->fd_knhashmask != 0) { 867 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 868 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 869 kn = *knp; 870 while (kn != NULL) { 871 kn0 = SLIST_NEXT(kn, kn_link); 872 if (kq == kn->kn_kq) { 873 kn->kn_fop->f_detach(kn); 874 *knp = kn0; 875 /* XXX non-fd release of kn->kn_ptr */ 876 FILEDESC_UNLOCK(fdp); 877 knote_free(kn); 878 FILEDESC_LOCK(fdp); 879 } else { 880 knp = &SLIST_NEXT(kn, kn_link); 881 } 882 kn = kn0; 883 } 884 } 885 } 886 FILEDESC_UNLOCK(fdp); 887 free(kq, M_KQUEUE); 888 fp->f_data = NULL; 889 890 return (0); 891 } 892 893 static void 894 kqueue_wakeup(struct kqueue *kq) 895 { 896 897 if (kq->kq_state & KQ_SLEEP) { 898 kq->kq_state &= ~KQ_SLEEP; 899 wakeup(kq); 900 } 901 if (kq->kq_state & KQ_SEL) { 902 kq->kq_state &= ~KQ_SEL; 903 selwakeup(&kq->kq_sel); 904 } 905 KNOTE(&kq->kq_sel.si_note, 0); 906 } 907 908 /* 909 * walk down a list of knotes, activating them if their event has triggered. 910 */ 911 void 912 knote(struct klist *list, long hint) 913 { 914 struct knote *kn; 915 916 SLIST_FOREACH(kn, list, kn_selnext) 917 if (kn->kn_fop->f_event(kn, hint)) 918 KNOTE_ACTIVATE(kn); 919 } 920 921 /* 922 * remove all knotes from a specified klist 923 */ 924 void 925 knote_remove(struct thread *td, struct klist *list) 926 { 927 struct knote *kn; 928 929 while ((kn = SLIST_FIRST(list)) != NULL) { 930 kn->kn_fop->f_detach(kn); 931 knote_drop(kn, td); 932 } 933 } 934 935 /* 936 * remove all knotes referencing a specified fd 937 */ 938 void 939 knote_fdclose(struct thread *td, int fd) 940 { 941 struct filedesc *fdp = td->td_proc->p_fd; 942 struct klist *list; 943 944 FILEDESC_LOCK(fdp); 945 list = &fdp->fd_knlist[fd]; 946 FILEDESC_UNLOCK(fdp); 947 knote_remove(td, list); 948 } 949 950 static void 951 knote_attach(struct knote *kn, struct filedesc *fdp) 952 { 953 struct klist *list, *tmp_knhash; 954 u_long tmp_knhashmask; 955 int size; 956 957 FILEDESC_LOCK(fdp); 958 959 if (! kn->kn_fop->f_isfd) { 960 if (fdp->fd_knhashmask == 0) { 961 FILEDESC_UNLOCK(fdp); 962 tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 963 &tmp_knhashmask); 964 FILEDESC_LOCK(fdp); 965 if (fdp->fd_knhashmask == 0) { 966 fdp->fd_knhash = tmp_knhash; 967 fdp->fd_knhashmask = tmp_knhashmask; 968 } else { 969 free(tmp_knhash, M_KQUEUE); 970 } 971 } 972 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 973 goto done; 974 } 975 976 if (fdp->fd_knlistsize <= kn->kn_id) { 977 size = fdp->fd_knlistsize; 978 while (size <= kn->kn_id) 979 size += KQEXTENT; 980 FILEDESC_UNLOCK(fdp); 981 MALLOC(list, struct klist *, 982 size * sizeof(struct klist *), M_KQUEUE, 0); 983 FILEDESC_LOCK(fdp); 984 if (fdp->fd_knlistsize > kn->kn_id) { 985 FREE(list, M_KQUEUE); 986 goto bigenough; 987 } 988 if (fdp->fd_knlist != NULL) { 989 bcopy(fdp->fd_knlist, list, 990 fdp->fd_knlistsize * sizeof(struct klist *)); 991 FREE(fdp->fd_knlist, M_KQUEUE); 992 } 993 bzero((caddr_t)list + 994 fdp->fd_knlistsize * sizeof(struct klist *), 995 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 996 fdp->fd_knlistsize = size; 997 fdp->fd_knlist = list; 998 } 999 bigenough: 1000 list = &fdp->fd_knlist[kn->kn_id]; 1001 done: 1002 FILEDESC_UNLOCK(fdp); 1003 SLIST_INSERT_HEAD(list, kn, kn_link); 1004 kn->kn_status = 0; 1005 } 1006 1007 /* 1008 * should be called at spl == 0, since we don't want to hold spl 1009 * while calling fdrop and free. 1010 */ 1011 static void 1012 knote_drop(struct knote *kn, struct thread *td) 1013 { 1014 struct filedesc *fdp = td->td_proc->p_fd; 1015 struct klist *list; 1016 1017 FILEDESC_LOCK(fdp); 1018 if (kn->kn_fop->f_isfd) 1019 list = &fdp->fd_knlist[kn->kn_id]; 1020 else 1021 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1022 if (kn->kn_fop->f_isfd) 1023 FILE_LOCK(kn->kn_fp); 1024 FILEDESC_UNLOCK(fdp); 1025 1026 SLIST_REMOVE(list, kn, knote, kn_link); 1027 if (kn->kn_status & KN_QUEUED) 1028 knote_dequeue(kn); 1029 if (kn->kn_fop->f_isfd) 1030 fdrop_locked(kn->kn_fp, td); 1031 knote_free(kn); 1032 } 1033 1034 1035 static void 1036 knote_enqueue(struct knote *kn) 1037 { 1038 struct kqueue *kq = kn->kn_kq; 1039 int s = splhigh(); 1040 1041 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1042 1043 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1044 kn->kn_status |= KN_QUEUED; 1045 kq->kq_count++; 1046 splx(s); 1047 kqueue_wakeup(kq); 1048 } 1049 1050 static void 1051 knote_dequeue(struct knote *kn) 1052 { 1053 struct kqueue *kq = kn->kn_kq; 1054 int s = splhigh(); 1055 1056 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1057 1058 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1059 kn->kn_status &= ~KN_QUEUED; 1060 kq->kq_count--; 1061 splx(s); 1062 } 1063 1064 static void 1065 knote_init(void) 1066 { 1067 knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 1068 NULL, NULL, UMA_ALIGN_PTR, 0); 1069 1070 } 1071 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1072 1073 static struct knote * 1074 knote_alloc(void) 1075 { 1076 return ((struct knote *)uma_zalloc(knote_zone, 0)); 1077 } 1078 1079 static void 1080 knote_free(struct knote *kn) 1081 { 1082 uma_zfree(knote_zone, kn); 1083 } 1084