1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/lock.h> 33 #include <sys/mutex.h> 34 #include <sys/proc.h> 35 #include <sys/malloc.h> 36 #include <sys/unistd.h> 37 #include <sys/file.h> 38 #include <sys/filedesc.h> 39 #include <sys/fcntl.h> 40 #include <sys/selinfo.h> 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <sys/eventvar.h> 44 #include <sys/poll.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/stat.h> 49 #include <sys/sysctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/uio.h> 52 53 #include <vm/uma.h> 54 55 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 56 57 static int kqueue_scan(struct file *fp, int maxevents, 58 struct kevent *ulistp, const struct timespec *timeout, 59 struct thread *td); 60 static void kqueue_wakeup(struct kqueue *kq); 61 62 static fo_rdwr_t kqueue_read; 63 static fo_rdwr_t kqueue_write; 64 static fo_ioctl_t kqueue_ioctl; 65 static fo_poll_t kqueue_poll; 66 static fo_kqfilter_t kqueue_kqfilter; 67 static fo_stat_t kqueue_stat; 68 static fo_close_t kqueue_close; 69 70 static struct fileops kqueueops = { 71 kqueue_read, 72 kqueue_write, 73 kqueue_ioctl, 74 kqueue_poll, 75 kqueue_kqfilter, 76 kqueue_stat, 77 kqueue_close, 78 0 79 }; 80 81 static void knote_attach(struct knote *kn, struct filedesc *fdp); 82 static void knote_drop(struct knote *kn, struct thread *td); 83 static void knote_enqueue(struct knote *kn); 84 static void knote_dequeue(struct knote *kn); 85 static void knote_init(void); 86 static struct knote *knote_alloc(void); 87 static void knote_free(struct knote *kn); 88 89 static void filt_kqdetach(struct knote *kn); 90 static int filt_kqueue(struct knote *kn, long hint); 91 static int filt_procattach(struct knote *kn); 92 static void filt_procdetach(struct knote *kn); 93 static int filt_proc(struct knote *kn, long hint); 94 static int filt_fileattach(struct knote *kn); 95 static void filt_timerexpire(void *knx); 96 static int filt_timerattach(struct knote *kn); 97 static void filt_timerdetach(struct knote *kn); 98 static int filt_timer(struct knote *kn, long hint); 99 100 static struct filterops file_filtops = 101 { 1, filt_fileattach, NULL, NULL }; 102 static struct filterops kqread_filtops = 103 { 1, NULL, filt_kqdetach, filt_kqueue }; 104 static struct filterops proc_filtops = 105 { 0, filt_procattach, filt_procdetach, filt_proc }; 106 static struct filterops timer_filtops = 107 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 108 109 static uma_zone_t knote_zone; 110 static int kq_ncallouts = 0; 111 static int kq_calloutmax = (4 * 1024); 112 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 113 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 114 115 #define KNOTE_ACTIVATE(kn) do { \ 116 kn->kn_status |= KN_ACTIVE; \ 117 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 118 knote_enqueue(kn); \ 119 } while(0) 120 121 #define KN_HASHSIZE 64 /* XXX should be tunable */ 122 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 123 124 static int 125 filt_nullattach(struct knote *kn) 126 { 127 128 return (ENXIO); 129 }; 130 131 struct filterops null_filtops = 132 { 0, filt_nullattach, NULL, NULL }; 133 134 extern struct filterops sig_filtops; 135 136 /* 137 * Table for for all system-defined filters. 138 */ 139 static struct filterops *sysfilt_ops[] = { 140 &file_filtops, /* EVFILT_READ */ 141 &file_filtops, /* EVFILT_WRITE */ 142 &null_filtops, /* EVFILT_AIO */ 143 &file_filtops, /* EVFILT_VNODE */ 144 &proc_filtops, /* EVFILT_PROC */ 145 &sig_filtops, /* EVFILT_SIGNAL */ 146 &timer_filtops, /* EVFILT_TIMER */ 147 &file_filtops, /* EVFILT_NETDEV */ 148 }; 149 150 static int 151 filt_fileattach(struct knote *kn) 152 { 153 154 return (fo_kqfilter(kn->kn_fp, kn)); 155 } 156 157 /*ARGSUSED*/ 158 static int 159 kqueue_kqfilter(struct file *fp, struct knote *kn) 160 { 161 struct kqueue *kq = kn->kn_fp->f_data; 162 163 if (kn->kn_filter != EVFILT_READ) 164 return (1); 165 166 kn->kn_fop = &kqread_filtops; 167 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 168 return (0); 169 } 170 171 static void 172 filt_kqdetach(struct knote *kn) 173 { 174 struct kqueue *kq = kn->kn_fp->f_data; 175 176 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 177 } 178 179 /*ARGSUSED*/ 180 static int 181 filt_kqueue(struct knote *kn, long hint) 182 { 183 struct kqueue *kq = kn->kn_fp->f_data; 184 185 kn->kn_data = kq->kq_count; 186 return (kn->kn_data > 0); 187 } 188 189 static int 190 filt_procattach(struct knote *kn) 191 { 192 struct proc *p; 193 int error; 194 195 p = pfind(kn->kn_id); 196 if (p == NULL) 197 return (ESRCH); 198 if ((error = p_cansee(curthread, p))) { 199 PROC_UNLOCK(p); 200 return (error); 201 } 202 203 kn->kn_ptr.p_proc = p; 204 kn->kn_flags |= EV_CLEAR; /* automatically set */ 205 206 /* 207 * internal flag indicating registration done by kernel 208 */ 209 if (kn->kn_flags & EV_FLAG1) { 210 kn->kn_data = kn->kn_sdata; /* ppid */ 211 kn->kn_fflags = NOTE_CHILD; 212 kn->kn_flags &= ~EV_FLAG1; 213 } 214 215 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 216 PROC_UNLOCK(p); 217 218 return (0); 219 } 220 221 /* 222 * The knote may be attached to a different process, which may exit, 223 * leaving nothing for the knote to be attached to. So when the process 224 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 225 * it will be deleted when read out. However, as part of the knote deletion, 226 * this routine is called, so a check is needed to avoid actually performing 227 * a detach, because the original process does not exist any more. 228 */ 229 static void 230 filt_procdetach(struct knote *kn) 231 { 232 struct proc *p = kn->kn_ptr.p_proc; 233 234 if (kn->kn_status & KN_DETACHED) 235 return; 236 237 PROC_LOCK(p); 238 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 239 PROC_UNLOCK(p); 240 } 241 242 static int 243 filt_proc(struct knote *kn, long hint) 244 { 245 u_int event; 246 247 /* 248 * mask off extra data 249 */ 250 event = (u_int)hint & NOTE_PCTRLMASK; 251 252 /* 253 * if the user is interested in this event, record it. 254 */ 255 if (kn->kn_sfflags & event) 256 kn->kn_fflags |= event; 257 258 /* 259 * process is gone, so flag the event as finished. 260 */ 261 if (event == NOTE_EXIT) { 262 kn->kn_status |= KN_DETACHED; 263 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 264 return (1); 265 } 266 267 /* 268 * process forked, and user wants to track the new process, 269 * so attach a new knote to it, and immediately report an 270 * event with the parent's pid. 271 */ 272 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 273 struct kevent kev; 274 int error; 275 276 /* 277 * register knote with new process. 278 */ 279 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 280 kev.filter = kn->kn_filter; 281 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 282 kev.fflags = kn->kn_sfflags; 283 kev.data = kn->kn_id; /* parent */ 284 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 285 error = kqueue_register(kn->kn_kq, &kev, NULL); 286 if (error) 287 kn->kn_fflags |= NOTE_TRACKERR; 288 } 289 290 return (kn->kn_fflags != 0); 291 } 292 293 static void 294 filt_timerexpire(void *knx) 295 { 296 struct knote *kn = knx; 297 struct callout *calloutp; 298 struct timeval tv; 299 int tticks; 300 301 kn->kn_data++; 302 KNOTE_ACTIVATE(kn); 303 304 if ((kn->kn_flags & EV_ONESHOT) == 0) { 305 tv.tv_sec = kn->kn_sdata / 1000; 306 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 307 tticks = tvtohz(&tv); 308 calloutp = (struct callout *)kn->kn_hook; 309 callout_reset(calloutp, tticks, filt_timerexpire, kn); 310 } 311 } 312 313 /* 314 * data contains amount of time to sleep, in milliseconds 315 */ 316 static int 317 filt_timerattach(struct knote *kn) 318 { 319 struct callout *calloutp; 320 struct timeval tv; 321 int tticks; 322 323 if (kq_ncallouts >= kq_calloutmax) 324 return (ENOMEM); 325 kq_ncallouts++; 326 327 tv.tv_sec = kn->kn_sdata / 1000; 328 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 329 tticks = tvtohz(&tv); 330 331 kn->kn_flags |= EV_CLEAR; /* automatically set */ 332 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 333 M_KQUEUE, M_WAITOK); 334 callout_init(calloutp, 0); 335 callout_reset(calloutp, tticks, filt_timerexpire, kn); 336 kn->kn_hook = calloutp; 337 338 return (0); 339 } 340 341 static void 342 filt_timerdetach(struct knote *kn) 343 { 344 struct callout *calloutp; 345 346 calloutp = (struct callout *)kn->kn_hook; 347 callout_stop(calloutp); 348 FREE(calloutp, M_KQUEUE); 349 kq_ncallouts--; 350 } 351 352 static int 353 filt_timer(struct knote *kn, long hint) 354 { 355 356 return (kn->kn_data != 0); 357 } 358 359 /* 360 * MPSAFE 361 */ 362 int 363 kqueue(struct thread *td, struct kqueue_args *uap) 364 { 365 struct filedesc *fdp; 366 struct kqueue *kq; 367 struct file *fp; 368 int fd, error; 369 370 mtx_lock(&Giant); 371 fdp = td->td_proc->p_fd; 372 error = falloc(td, &fp, &fd); 373 if (error) 374 goto done2; 375 kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 376 TAILQ_INIT(&kq->kq_head); 377 FILE_LOCK(fp); 378 fp->f_flag = FREAD | FWRITE; 379 fp->f_type = DTYPE_KQUEUE; 380 fp->f_ops = &kqueueops; 381 TAILQ_INIT(&kq->kq_head); 382 fp->f_data = kq; 383 FILE_UNLOCK(fp); 384 FILEDESC_LOCK(fdp); 385 td->td_retval[0] = fd; 386 if (fdp->fd_knlistsize < 0) 387 fdp->fd_knlistsize = 0; /* this process has a kq */ 388 FILEDESC_UNLOCK(fdp); 389 kq->kq_fdp = fdp; 390 done2: 391 mtx_unlock(&Giant); 392 return (error); 393 } 394 395 #ifndef _SYS_SYSPROTO_H_ 396 struct kevent_args { 397 int fd; 398 const struct kevent *changelist; 399 int nchanges; 400 struct kevent *eventlist; 401 int nevents; 402 const struct timespec *timeout; 403 }; 404 #endif 405 /* 406 * MPSAFE 407 */ 408 int 409 kevent(struct thread *td, struct kevent_args *uap) 410 { 411 struct kevent *kevp; 412 struct kqueue *kq; 413 struct file *fp; 414 struct timespec ts; 415 int i, n, nerrors, error; 416 417 if ((error = fget(td, uap->fd, &fp)) != 0) 418 return (error); 419 if (fp->f_type != DTYPE_KQUEUE) { 420 fdrop(fp, td); 421 return (EBADF); 422 } 423 if (uap->timeout != NULL) { 424 error = copyin(uap->timeout, &ts, sizeof(ts)); 425 if (error) 426 goto done_nogiant; 427 uap->timeout = &ts; 428 } 429 mtx_lock(&Giant); 430 431 kq = fp->f_data; 432 nerrors = 0; 433 434 while (uap->nchanges > 0) { 435 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 436 error = copyin(uap->changelist, kq->kq_kev, 437 n * sizeof(struct kevent)); 438 if (error) 439 goto done; 440 for (i = 0; i < n; i++) { 441 kevp = &kq->kq_kev[i]; 442 kevp->flags &= ~EV_SYSFLAGS; 443 error = kqueue_register(kq, kevp, td); 444 if (error) { 445 if (uap->nevents != 0) { 446 kevp->flags = EV_ERROR; 447 kevp->data = error; 448 (void) copyout(kevp, 449 uap->eventlist, 450 sizeof(*kevp)); 451 uap->eventlist++; 452 uap->nevents--; 453 nerrors++; 454 } else { 455 goto done; 456 } 457 } 458 } 459 uap->nchanges -= n; 460 uap->changelist += n; 461 } 462 if (nerrors) { 463 td->td_retval[0] = nerrors; 464 error = 0; 465 goto done; 466 } 467 468 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td); 469 done: 470 mtx_unlock(&Giant); 471 done_nogiant: 472 if (fp != NULL) 473 fdrop(fp, td); 474 return (error); 475 } 476 477 int 478 kqueue_add_filteropts(int filt, struct filterops *filtops) 479 { 480 481 if (filt > 0) 482 panic("filt(%d) > 0", filt); 483 if (filt + EVFILT_SYSCOUNT < 0) 484 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 485 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 486 if (sysfilt_ops[~filt] != &null_filtops) 487 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 488 sysfilt_ops[~filt] = filtops; 489 return (0); 490 } 491 492 int 493 kqueue_del_filteropts(int filt) 494 { 495 496 if (filt > 0) 497 panic("filt(%d) > 0", filt); 498 if (filt + EVFILT_SYSCOUNT < 0) 499 panic("filt(%d) + EVFILT_SYSCOUNT(%d) == %d < 0", 500 filt, EVFILT_SYSCOUNT, filt + EVFILT_SYSCOUNT); 501 if (sysfilt_ops[~filt] == &null_filtops) 502 panic("sysfilt_ops[~filt(%d)] != &null_filtops", filt); 503 sysfilt_ops[~filt] = &null_filtops; 504 return (0); 505 } 506 507 int 508 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) 509 { 510 struct filedesc *fdp = kq->kq_fdp; 511 struct filterops *fops; 512 struct file *fp = NULL; 513 struct knote *kn = NULL; 514 int s, error = 0; 515 516 if (kev->filter < 0) { 517 if (kev->filter + EVFILT_SYSCOUNT < 0) 518 return (EINVAL); 519 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 520 } else { 521 /* 522 * XXX 523 * filter attach routine is responsible for insuring that 524 * the identifier can be attached to it. 525 */ 526 printf("unknown filter: %d\n", kev->filter); 527 return (EINVAL); 528 } 529 530 FILEDESC_LOCK(fdp); 531 if (fops->f_isfd) { 532 /* validate descriptor */ 533 if ((u_int)kev->ident >= fdp->fd_nfiles || 534 (fp = fdp->fd_ofiles[kev->ident]) == NULL) { 535 FILEDESC_UNLOCK(fdp); 536 return (EBADF); 537 } 538 fhold(fp); 539 540 if (kev->ident < fdp->fd_knlistsize) { 541 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 542 if (kq == kn->kn_kq && 543 kev->filter == kn->kn_filter) 544 break; 545 } 546 } else { 547 if (fdp->fd_knhashmask != 0) { 548 struct klist *list; 549 550 list = &fdp->fd_knhash[ 551 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 552 SLIST_FOREACH(kn, list, kn_link) 553 if (kev->ident == kn->kn_id && 554 kq == kn->kn_kq && 555 kev->filter == kn->kn_filter) 556 break; 557 } 558 } 559 FILEDESC_UNLOCK(fdp); 560 561 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 562 error = ENOENT; 563 goto done; 564 } 565 566 /* 567 * kn now contains the matching knote, or NULL if no match 568 */ 569 if (kev->flags & EV_ADD) { 570 571 if (kn == NULL) { 572 kn = knote_alloc(); 573 if (kn == NULL) { 574 error = ENOMEM; 575 goto done; 576 } 577 kn->kn_fp = fp; 578 kn->kn_kq = kq; 579 kn->kn_fop = fops; 580 581 /* 582 * apply reference count to knote structure, and 583 * do not release it at the end of this routine. 584 */ 585 fp = NULL; 586 587 kn->kn_sfflags = kev->fflags; 588 kn->kn_sdata = kev->data; 589 kev->fflags = 0; 590 kev->data = 0; 591 kn->kn_kevent = *kev; 592 593 knote_attach(kn, fdp); 594 if ((error = fops->f_attach(kn)) != 0) { 595 knote_drop(kn, td); 596 goto done; 597 } 598 } else { 599 /* 600 * The user may change some filter values after the 601 * initial EV_ADD, but doing so will not reset any 602 * filter which has already been triggered. 603 */ 604 kn->kn_sfflags = kev->fflags; 605 kn->kn_sdata = kev->data; 606 kn->kn_kevent.udata = kev->udata; 607 } 608 609 s = splhigh(); 610 if (kn->kn_fop->f_event(kn, 0)) 611 KNOTE_ACTIVATE(kn); 612 splx(s); 613 614 } else if (kev->flags & EV_DELETE) { 615 kn->kn_fop->f_detach(kn); 616 knote_drop(kn, td); 617 goto done; 618 } 619 620 if ((kev->flags & EV_DISABLE) && 621 ((kn->kn_status & KN_DISABLED) == 0)) { 622 s = splhigh(); 623 kn->kn_status |= KN_DISABLED; 624 splx(s); 625 } 626 627 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 628 s = splhigh(); 629 kn->kn_status &= ~KN_DISABLED; 630 if ((kn->kn_status & KN_ACTIVE) && 631 ((kn->kn_status & KN_QUEUED) == 0)) 632 knote_enqueue(kn); 633 splx(s); 634 } 635 636 done: 637 if (fp != NULL) 638 fdrop(fp, td); 639 return (error); 640 } 641 642 static int 643 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 644 const struct timespec *tsp, struct thread *td) 645 { 646 struct kqueue *kq; 647 struct kevent *kevp; 648 struct timeval atv, rtv, ttv; 649 struct knote *kn, marker; 650 int s, count, timeout, nkev = 0, error = 0; 651 652 FILE_LOCK_ASSERT(fp, MA_NOTOWNED); 653 654 kq = fp->f_data; 655 count = maxevents; 656 if (count == 0) 657 goto done; 658 659 if (tsp != NULL) { 660 TIMESPEC_TO_TIMEVAL(&atv, tsp); 661 if (itimerfix(&atv)) { 662 error = EINVAL; 663 goto done; 664 } 665 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 666 timeout = -1; 667 else 668 timeout = atv.tv_sec > 24 * 60 * 60 ? 669 24 * 60 * 60 * hz : tvtohz(&atv); 670 getmicrouptime(&rtv); 671 timevaladd(&atv, &rtv); 672 } else { 673 atv.tv_sec = 0; 674 atv.tv_usec = 0; 675 timeout = 0; 676 } 677 goto start; 678 679 retry: 680 if (atv.tv_sec || atv.tv_usec) { 681 getmicrouptime(&rtv); 682 if (timevalcmp(&rtv, &atv, >=)) 683 goto done; 684 ttv = atv; 685 timevalsub(&ttv, &rtv); 686 timeout = ttv.tv_sec > 24 * 60 * 60 ? 687 24 * 60 * 60 * hz : tvtohz(&ttv); 688 } 689 690 start: 691 kevp = kq->kq_kev; 692 s = splhigh(); 693 if (kq->kq_count == 0) { 694 if (timeout < 0) { 695 error = EWOULDBLOCK; 696 } else { 697 kq->kq_state |= KQ_SLEEP; 698 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 699 } 700 splx(s); 701 if (error == 0) 702 goto retry; 703 /* don't restart after signals... */ 704 if (error == ERESTART) 705 error = EINTR; 706 else if (error == EWOULDBLOCK) 707 error = 0; 708 goto done; 709 } 710 711 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 712 while (count) { 713 kn = TAILQ_FIRST(&kq->kq_head); 714 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 715 if (kn == &marker) { 716 splx(s); 717 if (count == maxevents) 718 goto retry; 719 goto done; 720 } 721 if (kn->kn_status & KN_DISABLED) { 722 kn->kn_status &= ~KN_QUEUED; 723 kq->kq_count--; 724 continue; 725 } 726 if ((kn->kn_flags & EV_ONESHOT) == 0 && 727 kn->kn_fop->f_event(kn, 0) == 0) { 728 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 729 kq->kq_count--; 730 continue; 731 } 732 *kevp = kn->kn_kevent; 733 kevp++; 734 nkev++; 735 if (kn->kn_flags & EV_ONESHOT) { 736 kn->kn_status &= ~KN_QUEUED; 737 kq->kq_count--; 738 splx(s); 739 kn->kn_fop->f_detach(kn); 740 knote_drop(kn, td); 741 s = splhigh(); 742 } else if (kn->kn_flags & EV_CLEAR) { 743 kn->kn_data = 0; 744 kn->kn_fflags = 0; 745 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 746 kq->kq_count--; 747 } else { 748 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 749 } 750 count--; 751 if (nkev == KQ_NEVENTS) { 752 splx(s); 753 error = copyout(&kq->kq_kev, ulistp, 754 sizeof(struct kevent) * nkev); 755 ulistp += nkev; 756 nkev = 0; 757 kevp = kq->kq_kev; 758 s = splhigh(); 759 if (error) 760 break; 761 } 762 } 763 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 764 splx(s); 765 done: 766 if (nkev != 0) 767 error = copyout(&kq->kq_kev, ulistp, 768 sizeof(struct kevent) * nkev); 769 td->td_retval[0] = maxevents - count; 770 return (error); 771 } 772 773 /* 774 * XXX 775 * This could be expanded to call kqueue_scan, if desired. 776 */ 777 /*ARGSUSED*/ 778 static int 779 kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 780 int flags, struct thread *td) 781 { 782 return (ENXIO); 783 } 784 785 /*ARGSUSED*/ 786 static int 787 kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 788 int flags, struct thread *td) 789 { 790 return (ENXIO); 791 } 792 793 /*ARGSUSED*/ 794 static int 795 kqueue_ioctl(struct file *fp, u_long com, void *data, 796 struct ucred *active_cred, struct thread *td) 797 { 798 return (ENOTTY); 799 } 800 801 /*ARGSUSED*/ 802 static int 803 kqueue_poll(struct file *fp, int events, struct ucred *active_cred, 804 struct thread *td) 805 { 806 struct kqueue *kq; 807 int revents = 0; 808 int s = splnet(); 809 810 kq = fp->f_data; 811 if (events & (POLLIN | POLLRDNORM)) { 812 if (kq->kq_count) { 813 revents |= events & (POLLIN | POLLRDNORM); 814 } else { 815 selrecord(td, &kq->kq_sel); 816 kq->kq_state |= KQ_SEL; 817 } 818 } 819 splx(s); 820 return (revents); 821 } 822 823 /*ARGSUSED*/ 824 static int 825 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 826 struct thread *td) 827 { 828 struct kqueue *kq; 829 830 kq = fp->f_data; 831 bzero((void *)st, sizeof(*st)); 832 st->st_size = kq->kq_count; 833 st->st_blksize = sizeof(struct kevent); 834 st->st_mode = S_IFIFO; 835 return (0); 836 } 837 838 /*ARGSUSED*/ 839 static int 840 kqueue_close(struct file *fp, struct thread *td) 841 { 842 struct kqueue *kq = fp->f_data; 843 struct filedesc *fdp = kq->kq_fdp; 844 struct knote **knp, *kn, *kn0; 845 int i; 846 847 FILEDESC_LOCK(fdp); 848 for (i = 0; i < fdp->fd_knlistsize; i++) { 849 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 850 kn = *knp; 851 while (kn != NULL) { 852 kn0 = SLIST_NEXT(kn, kn_link); 853 if (kq == kn->kn_kq) { 854 kn->kn_fop->f_detach(kn); 855 *knp = kn0; 856 FILE_LOCK(kn->kn_fp); 857 FILEDESC_UNLOCK(fdp); 858 fdrop_locked(kn->kn_fp, td); 859 knote_free(kn); 860 FILEDESC_LOCK(fdp); 861 } else { 862 knp = &SLIST_NEXT(kn, kn_link); 863 } 864 kn = kn0; 865 } 866 } 867 if (fdp->fd_knhashmask != 0) { 868 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 869 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 870 kn = *knp; 871 while (kn != NULL) { 872 kn0 = SLIST_NEXT(kn, kn_link); 873 if (kq == kn->kn_kq) { 874 kn->kn_fop->f_detach(kn); 875 *knp = kn0; 876 /* XXX non-fd release of kn->kn_ptr */ 877 FILEDESC_UNLOCK(fdp); 878 knote_free(kn); 879 FILEDESC_LOCK(fdp); 880 } else { 881 knp = &SLIST_NEXT(kn, kn_link); 882 } 883 kn = kn0; 884 } 885 } 886 } 887 FILEDESC_UNLOCK(fdp); 888 free(kq, M_KQUEUE); 889 fp->f_data = NULL; 890 891 return (0); 892 } 893 894 static void 895 kqueue_wakeup(struct kqueue *kq) 896 { 897 898 if (kq->kq_state & KQ_SLEEP) { 899 kq->kq_state &= ~KQ_SLEEP; 900 wakeup(kq); 901 } 902 if (kq->kq_state & KQ_SEL) { 903 kq->kq_state &= ~KQ_SEL; 904 selwakeup(&kq->kq_sel); 905 } 906 KNOTE(&kq->kq_sel.si_note, 0); 907 } 908 909 /* 910 * walk down a list of knotes, activating them if their event has triggered. 911 */ 912 void 913 knote(struct klist *list, long hint) 914 { 915 struct knote *kn; 916 917 SLIST_FOREACH(kn, list, kn_selnext) 918 if (kn->kn_fop->f_event(kn, hint)) 919 KNOTE_ACTIVATE(kn); 920 } 921 922 /* 923 * remove all knotes from a specified klist 924 */ 925 void 926 knote_remove(struct thread *td, struct klist *list) 927 { 928 struct knote *kn; 929 930 while ((kn = SLIST_FIRST(list)) != NULL) { 931 kn->kn_fop->f_detach(kn); 932 knote_drop(kn, td); 933 } 934 } 935 936 /* 937 * remove all knotes referencing a specified fd 938 */ 939 void 940 knote_fdclose(struct thread *td, int fd) 941 { 942 struct filedesc *fdp = td->td_proc->p_fd; 943 struct klist *list; 944 945 FILEDESC_LOCK(fdp); 946 list = &fdp->fd_knlist[fd]; 947 FILEDESC_UNLOCK(fdp); 948 knote_remove(td, list); 949 } 950 951 static void 952 knote_attach(struct knote *kn, struct filedesc *fdp) 953 { 954 struct klist *list, *tmp_knhash; 955 u_long tmp_knhashmask; 956 int size; 957 958 FILEDESC_LOCK(fdp); 959 960 if (! kn->kn_fop->f_isfd) { 961 if (fdp->fd_knhashmask == 0) { 962 FILEDESC_UNLOCK(fdp); 963 tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 964 &tmp_knhashmask); 965 FILEDESC_LOCK(fdp); 966 if (fdp->fd_knhashmask == 0) { 967 fdp->fd_knhash = tmp_knhash; 968 fdp->fd_knhashmask = tmp_knhashmask; 969 } else { 970 free(tmp_knhash, M_KQUEUE); 971 } 972 } 973 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 974 goto done; 975 } 976 977 if (fdp->fd_knlistsize <= kn->kn_id) { 978 size = fdp->fd_knlistsize; 979 while (size <= kn->kn_id) 980 size += KQEXTENT; 981 FILEDESC_UNLOCK(fdp); 982 MALLOC(list, struct klist *, 983 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); 984 FILEDESC_LOCK(fdp); 985 if (fdp->fd_knlistsize > kn->kn_id) { 986 FREE(list, M_KQUEUE); 987 goto bigenough; 988 } 989 if (fdp->fd_knlist != NULL) { 990 bcopy(fdp->fd_knlist, list, 991 fdp->fd_knlistsize * sizeof(struct klist *)); 992 FREE(fdp->fd_knlist, M_KQUEUE); 993 } 994 bzero((caddr_t)list + 995 fdp->fd_knlistsize * sizeof(struct klist *), 996 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 997 fdp->fd_knlistsize = size; 998 fdp->fd_knlist = list; 999 } 1000 bigenough: 1001 list = &fdp->fd_knlist[kn->kn_id]; 1002 done: 1003 FILEDESC_UNLOCK(fdp); 1004 SLIST_INSERT_HEAD(list, kn, kn_link); 1005 kn->kn_status = 0; 1006 } 1007 1008 /* 1009 * should be called at spl == 0, since we don't want to hold spl 1010 * while calling fdrop and free. 1011 */ 1012 static void 1013 knote_drop(struct knote *kn, struct thread *td) 1014 { 1015 struct filedesc *fdp = td->td_proc->p_fd; 1016 struct klist *list; 1017 1018 FILEDESC_LOCK(fdp); 1019 if (kn->kn_fop->f_isfd) 1020 list = &fdp->fd_knlist[kn->kn_id]; 1021 else 1022 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1023 if (kn->kn_fop->f_isfd) 1024 FILE_LOCK(kn->kn_fp); 1025 FILEDESC_UNLOCK(fdp); 1026 1027 SLIST_REMOVE(list, kn, knote, kn_link); 1028 if (kn->kn_status & KN_QUEUED) 1029 knote_dequeue(kn); 1030 if (kn->kn_fop->f_isfd) 1031 fdrop_locked(kn->kn_fp, td); 1032 knote_free(kn); 1033 } 1034 1035 1036 static void 1037 knote_enqueue(struct knote *kn) 1038 { 1039 struct kqueue *kq = kn->kn_kq; 1040 int s = splhigh(); 1041 1042 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1043 1044 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1045 kn->kn_status |= KN_QUEUED; 1046 kq->kq_count++; 1047 splx(s); 1048 kqueue_wakeup(kq); 1049 } 1050 1051 static void 1052 knote_dequeue(struct knote *kn) 1053 { 1054 struct kqueue *kq = kn->kn_kq; 1055 int s = splhigh(); 1056 1057 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1058 1059 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1060 kn->kn_status &= ~KN_QUEUED; 1061 kq->kq_count--; 1062 splx(s); 1063 } 1064 1065 static void 1066 knote_init(void) 1067 { 1068 knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, 1069 NULL, NULL, UMA_ALIGN_PTR, 0); 1070 1071 } 1072 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1073 1074 static struct knote * 1075 knote_alloc(void) 1076 { 1077 return ((struct knote *)uma_zalloc(knote_zone, M_WAITOK)); 1078 } 1079 1080 static void 1081 knote_free(struct knote *kn) 1082 { 1083 uma_zfree(knote_zone, kn); 1084 } 1085