1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2025 Klara, Inc. 5 */ 6 7 #include "opt_ktrace.h" 8 9 #include <sys/param.h> 10 #include <sys/systm.h> 11 #include <sys/caprights.h> 12 #include <sys/counter.h> 13 #include <sys/dirent.h> 14 #define EXTERR_CATEGORY EXTERR_CAT_INOTIFY 15 #include <sys/exterrvar.h> 16 #include <sys/fcntl.h> 17 #include <sys/file.h> 18 #include <sys/filio.h> 19 #include <sys/inotify.h> 20 #include <sys/kernel.h> 21 #include <sys/lock.h> 22 #include <sys/ktrace.h> 23 #include <sys/malloc.h> 24 #include <sys/mutex.h> 25 #include <sys/namei.h> 26 #include <sys/poll.h> 27 #include <sys/proc.h> 28 #include <sys/queue.h> 29 #include <sys/resourcevar.h> 30 #include <sys/selinfo.h> 31 #include <sys/stat.h> 32 #include <sys/syscallsubr.h> 33 #include <sys/sysctl.h> 34 #include <sys/sysent.h> 35 #include <sys/syslimits.h> 36 #include <sys/sysproto.h> 37 #include <sys/tree.h> 38 #include <sys/user.h> 39 #include <sys/vnode.h> 40 41 uint32_t inotify_rename_cookie; 42 43 static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 44 "inotify configuration"); 45 46 static int inotify_max_queued_events = 16384; 47 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN, 48 &inotify_max_queued_events, 0, 49 "Maximum number of events to queue on an inotify descriptor"); 50 51 static int inotify_max_user_instances = 256; 52 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN, 53 &inotify_max_user_instances, 0, 54 "Maximum number of inotify descriptors per user"); 55 56 static int inotify_max_user_watches; 57 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN, 58 &inotify_max_user_watches, 0, 59 "Maximum number of inotify watches per user"); 60 61 static int inotify_max_watches; 62 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN, 63 &inotify_max_watches, 0, 64 "Maximum number of inotify watches system-wide"); 65 66 static int inotify_watches; 67 SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD, 68 &inotify_watches, 0, 69 "Total number of inotify watches currently in use"); 70 71 static int inotify_coalesce = 1; 72 SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN, 73 &inotify_coalesce, 0, 74 "Coalesce inotify events when possible"); 75 76 static COUNTER_U64_DEFINE_EARLY(inotify_event_drops); 77 SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD, 78 &inotify_event_drops, 79 "Number of inotify events dropped due to limits or allocation failures"); 80 81 static fo_rdwr_t inotify_read; 82 static fo_ioctl_t inotify_ioctl; 83 static fo_poll_t inotify_poll; 84 static fo_kqfilter_t inotify_kqfilter; 85 static fo_stat_t inotify_stat; 86 static fo_close_t inotify_close; 87 static fo_fill_kinfo_t inotify_fill_kinfo; 88 89 static const struct fileops inotifyfdops = { 90 .fo_read = inotify_read, 91 .fo_write = invfo_rdwr, 92 .fo_truncate = invfo_truncate, 93 .fo_ioctl = inotify_ioctl, 94 .fo_poll = inotify_poll, 95 .fo_kqfilter = inotify_kqfilter, 96 .fo_stat = inotify_stat, 97 .fo_close = inotify_close, 98 .fo_chmod = invfo_chmod, 99 .fo_chown = invfo_chown, 100 .fo_sendfile = invfo_sendfile, 101 .fo_fill_kinfo = inotify_fill_kinfo, 102 .fo_cmp = file_kcmp_generic, 103 .fo_flags = DFLAG_PASSABLE, 104 }; 105 106 static void filt_inotifydetach(struct knote *kn); 107 static int filt_inotifyevent(struct knote *kn, long hint); 108 109 static const struct filterops inotify_rfiltops = { 110 .f_isfd = 1, 111 .f_detach = filt_inotifydetach, 112 .f_event = filt_inotifyevent, 113 }; 114 115 static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); 116 117 struct inotify_record { 118 STAILQ_ENTRY(inotify_record) link; 119 struct inotify_event ev; 120 }; 121 122 static uint64_t inotify_ino = 1; 123 124 /* 125 * On LP64 systems this occupies 64 bytes, so we don't get internal 126 * fragmentation by allocating watches with malloc(9). If the size changes, 127 * consider using a UMA zone to improve memory efficiency. 128 */ 129 struct inotify_watch { 130 struct inotify_softc *sc; /* back-pointer */ 131 int wd; /* unique ID */ 132 uint32_t mask; /* event mask */ 133 struct vnode *vp; /* vnode being watched, refed */ 134 RB_ENTRY(inotify_watch) ilink; /* inotify linkage */ 135 TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */ 136 }; 137 138 static void 139 inotify_init(void *arg __unused) 140 { 141 /* Don't let a user hold too many vnodes. */ 142 inotify_max_user_watches = desiredvnodes / 3; 143 /* Don't let the system hold too many vnodes. */ 144 inotify_max_watches = desiredvnodes / 2; 145 } 146 SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL); 147 148 static int 149 inotify_watch_cmp(const struct inotify_watch *a, 150 const struct inotify_watch *b) 151 { 152 if (a->wd < b->wd) 153 return (-1); 154 else if (a->wd > b->wd) 155 return (1); 156 else 157 return (0); 158 } 159 RB_HEAD(inotify_watch_tree, inotify_watch); 160 RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp); 161 162 struct inotify_softc { 163 struct mtx lock; /* serialize all softc writes */ 164 STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */ 165 struct inotify_record overflow; /* preallocated record */ 166 int nextwatch; /* next watch ID to try */ 167 int npending; /* number of pending events */ 168 size_t nbpending; /* bytes available to read */ 169 uint64_t ino; /* unique identifier */ 170 struct inotify_watch_tree watches; /* active watches */ 171 struct selinfo sel; /* select/poll/kevent info */ 172 struct ucred *cred; /* credential ref */ 173 }; 174 175 static struct inotify_record * 176 inotify_dequeue(struct inotify_softc *sc) 177 { 178 struct inotify_record *rec; 179 180 mtx_assert(&sc->lock, MA_OWNED); 181 KASSERT(!STAILQ_EMPTY(&sc->pending), 182 ("%s: queue for %p is empty", __func__, sc)); 183 184 rec = STAILQ_FIRST(&sc->pending); 185 STAILQ_REMOVE_HEAD(&sc->pending, link); 186 sc->npending--; 187 sc->nbpending -= sizeof(rec->ev) + rec->ev.len; 188 return (rec); 189 } 190 191 static void 192 inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head) 193 { 194 mtx_assert(&sc->lock, MA_OWNED); 195 196 if (head) 197 STAILQ_INSERT_HEAD(&sc->pending, rec, link); 198 else 199 STAILQ_INSERT_TAIL(&sc->pending, rec, link); 200 sc->npending++; 201 sc->nbpending += sizeof(rec->ev) + rec->ev.len; 202 } 203 204 static int 205 inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, 206 struct thread *td) 207 { 208 struct inotify_softc *sc; 209 struct inotify_record *rec; 210 int error; 211 bool first; 212 213 sc = fp->f_data; 214 error = 0; 215 216 mtx_lock(&sc->lock); 217 while (STAILQ_EMPTY(&sc->pending)) { 218 if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) { 219 mtx_unlock(&sc->lock); 220 return (EWOULDBLOCK); 221 } 222 error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0); 223 if (error != 0) { 224 mtx_unlock(&sc->lock); 225 return (error); 226 } 227 } 228 for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) { 229 size_t len; 230 231 rec = inotify_dequeue(sc); 232 len = sizeof(rec->ev) + rec->ev.len; 233 if (uio->uio_resid < (ssize_t)len) { 234 inotify_enqueue(sc, rec, true); 235 if (first) { 236 error = EXTERROR(EINVAL, 237 "read buffer is too small"); 238 } 239 break; 240 } 241 mtx_unlock(&sc->lock); 242 error = uiomove(&rec->ev, len, uio); 243 #ifdef KTRACE 244 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 245 ktrstruct("inotify", &rec->ev, len); 246 #endif 247 mtx_lock(&sc->lock); 248 if (error != 0) { 249 inotify_enqueue(sc, rec, true); 250 mtx_unlock(&sc->lock); 251 return (error); 252 } 253 if (rec == &sc->overflow) { 254 /* 255 * Signal to inotify_queue_record() that the overflow 256 * record can be reused. 257 */ 258 memset(rec, 0, sizeof(*rec)); 259 } else { 260 free(rec, M_INOTIFY); 261 } 262 } 263 mtx_unlock(&sc->lock); 264 return (error); 265 } 266 267 static int 268 inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred, 269 struct thread *td) 270 { 271 struct inotify_softc *sc; 272 273 sc = fp->f_data; 274 275 switch (com) { 276 case FIONREAD: 277 *(int *)data = (int)sc->nbpending; 278 return (0); 279 case FIONBIO: 280 case FIOASYNC: 281 return (0); 282 default: 283 return (ENOTTY); 284 } 285 286 return (0); 287 } 288 289 static int 290 inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) 291 { 292 struct inotify_softc *sc; 293 int revents; 294 295 sc = fp->f_data; 296 revents = 0; 297 298 mtx_lock(&sc->lock); 299 if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0) 300 revents |= events & (POLLIN | POLLRDNORM); 301 else 302 selrecord(td, &sc->sel); 303 mtx_unlock(&sc->lock); 304 return (revents); 305 } 306 307 static void 308 filt_inotifydetach(struct knote *kn) 309 { 310 struct inotify_softc *sc; 311 312 sc = kn->kn_hook; 313 knlist_remove(&sc->sel.si_note, kn, 0); 314 } 315 316 static int 317 filt_inotifyevent(struct knote *kn, long hint) 318 { 319 struct inotify_softc *sc; 320 321 sc = kn->kn_hook; 322 mtx_assert(&sc->lock, MA_OWNED); 323 kn->kn_data = sc->nbpending; 324 return (kn->kn_data > 0); 325 } 326 327 static int 328 inotify_kqfilter(struct file *fp, struct knote *kn) 329 { 330 struct inotify_softc *sc; 331 332 if (kn->kn_filter != EVFILT_READ) 333 return (EINVAL); 334 sc = fp->f_data; 335 kn->kn_fop = &inotify_rfiltops; 336 kn->kn_hook = sc; 337 knlist_add(&sc->sel.si_note, kn, 0); 338 return (0); 339 } 340 341 static int 342 inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred) 343 { 344 struct inotify_softc *sc; 345 346 sc = fp->f_data; 347 348 memset(sb, 0, sizeof(*sb)); 349 sb->st_mode = S_IFREG | S_IRUSR; 350 sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX); 351 mtx_lock(&sc->lock); 352 sb->st_size = sc->nbpending; 353 sb->st_blocks = sc->npending; 354 sb->st_uid = sc->cred->cr_ruid; 355 sb->st_gid = sc->cred->cr_rgid; 356 sb->st_ino = sc->ino; 357 mtx_unlock(&sc->lock); 358 return (0); 359 } 360 361 static void 362 inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch) 363 { 364 struct vnode *vp; 365 366 vp = watch->vp; 367 mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED); 368 369 atomic_subtract_int(&inotify_watches, 1); 370 (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); 371 372 TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink); 373 if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify)) 374 vn_irflag_unset(vp, VIRF_INOTIFY); 375 } 376 377 /* 378 * Assumes that the watch has already been removed from its softc. 379 */ 380 static void 381 inotify_remove_watch(struct inotify_watch *watch) 382 { 383 struct inotify_softc *sc; 384 struct vnode *vp; 385 386 sc = watch->sc; 387 388 vp = watch->vp; 389 mtx_lock(&vp->v_pollinfo->vpi_lock); 390 inotify_unlink_watch_locked(sc, watch); 391 mtx_unlock(&vp->v_pollinfo->vpi_lock); 392 393 vrele(vp); 394 free(watch, M_INOTIFY); 395 } 396 397 static int 398 inotify_close(struct file *fp, struct thread *td) 399 { 400 struct inotify_softc *sc; 401 struct inotify_record *rec; 402 struct inotify_watch *watch; 403 404 sc = fp->f_data; 405 406 mtx_lock(&sc->lock); 407 (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0); 408 while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) { 409 RB_REMOVE(inotify_watch_tree, &sc->watches, watch); 410 mtx_unlock(&sc->lock); 411 inotify_remove_watch(watch); 412 mtx_lock(&sc->lock); 413 } 414 while (!STAILQ_EMPTY(&sc->pending)) { 415 rec = inotify_dequeue(sc); 416 if (rec != &sc->overflow) 417 free(rec, M_INOTIFY); 418 } 419 mtx_unlock(&sc->lock); 420 seldrain(&sc->sel); 421 knlist_destroy(&sc->sel.si_note); 422 mtx_destroy(&sc->lock); 423 crfree(sc->cred); 424 free(sc, M_INOTIFY); 425 return (0); 426 } 427 428 static int 429 inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif, 430 struct filedesc *fdp) 431 { 432 struct inotify_softc *sc; 433 434 sc = fp->f_data; 435 436 kif->kf_type = KF_TYPE_INOTIFY; 437 kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending; 438 kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending; 439 return (0); 440 } 441 442 int 443 inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp) 444 { 445 struct inotify_softc *sc; 446 int fflags; 447 448 if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0) 449 return (EINVAL); 450 451 if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1, 452 inotify_max_user_instances)) 453 return (EMFILE); 454 455 sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO); 456 sc->nextwatch = 1; /* Required for compatibility. */ 457 STAILQ_INIT(&sc->pending); 458 RB_INIT(&sc->watches); 459 mtx_init(&sc->lock, "inotify", NULL, MTX_DEF); 460 knlist_init_mtx(&sc->sel.si_note, &sc->lock); 461 sc->cred = crhold(td->td_ucred); 462 sc->ino = atomic_fetchadd_64(&inotify_ino, 1); 463 464 fflags = FREAD; 465 if ((flags & IN_NONBLOCK) != 0) 466 fflags |= FNONBLOCK; 467 if ((flags & IN_CLOEXEC) != 0) 468 *fflagsp |= O_CLOEXEC; 469 finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops); 470 471 return (0); 472 } 473 474 static struct inotify_record * 475 inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event, 476 uint32_t cookie, int waitok) 477 { 478 struct inotify_event *evp; 479 struct inotify_record *rec; 480 481 rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY, 482 waitok | M_ZERO); 483 if (rec == NULL) 484 return (NULL); 485 evp = &rec->ev; 486 evp->wd = wd; 487 evp->mask = event; 488 evp->cookie = cookie; 489 evp->len = _IN_NAMESIZE(namelen); 490 if (name != NULL) 491 memcpy(evp->name, name, namelen); 492 return (rec); 493 } 494 495 static bool 496 inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) 497 { 498 struct inotify_record *prev; 499 500 mtx_assert(&sc->lock, MA_OWNED); 501 502 prev = STAILQ_LAST(&sc->pending, inotify_record, link); 503 return (prev != NULL && prev->ev.mask == evp->mask && 504 prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && 505 prev->ev.len == evp->len && 506 memcmp(prev->ev.name, evp->name, evp->len) == 0); 507 } 508 509 static void 510 inotify_overflow_event(struct inotify_event *evp) 511 { 512 evp->mask = IN_Q_OVERFLOW; 513 evp->wd = -1; 514 evp->cookie = 0; 515 evp->len = 0; 516 } 517 518 /* 519 * Put an event record on the queue for an inotify desscriptor. Return false if 520 * the record was not enqueued for some reason, true otherwise. 521 */ 522 static bool 523 inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec) 524 { 525 struct inotify_event *evp; 526 527 mtx_assert(&sc->lock, MA_OWNED); 528 529 evp = &rec->ev; 530 if (__predict_false(rec == &sc->overflow)) { 531 /* 532 * Is the overflow record already in the queue? If so, there's 533 * not much else we can do: we're here because a kernel memory 534 * shortage prevented new record allocations. 535 */ 536 counter_u64_add(inotify_event_drops, 1); 537 if (evp->mask == IN_Q_OVERFLOW) 538 return (false); 539 inotify_overflow_event(evp); 540 } else { 541 /* Try to coalesce duplicate events. */ 542 if (inotify_coalesce && inotify_can_coalesce(sc, evp)) 543 return (false); 544 545 /* 546 * Would this one overflow the queue? If so, convert it to an 547 * overflow event and try again to coalesce. 548 */ 549 if (sc->npending >= inotify_max_queued_events) { 550 counter_u64_add(inotify_event_drops, 1); 551 inotify_overflow_event(evp); 552 if (inotify_can_coalesce(sc, evp)) 553 return (false); 554 } 555 } 556 inotify_enqueue(sc, rec, false); 557 selwakeup(&sc->sel); 558 KNOTE_LOCKED(&sc->sel.si_note, 0); 559 wakeup(&sc->pending); 560 return (true); 561 } 562 563 static int 564 inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen, 565 int event, uint32_t cookie) 566 { 567 struct inotify_watch key; 568 struct inotify_softc *sc; 569 struct inotify_record *rec; 570 int relecount; 571 bool allocfail; 572 573 relecount = 0; 574 575 sc = watch->sc; 576 rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie, 577 M_NOWAIT); 578 if (rec == NULL) { 579 rec = &sc->overflow; 580 allocfail = true; 581 } else { 582 allocfail = false; 583 } 584 585 mtx_lock(&sc->lock); 586 if (!inotify_queue_record(sc, rec) && rec != &sc->overflow) 587 free(rec, M_INOTIFY); 588 if ((watch->mask & IN_ONESHOT) != 0 || 589 (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) { 590 if (!allocfail) { 591 rec = inotify_alloc_record(watch->wd, NULL, 0, 592 IN_IGNORED, 0, M_NOWAIT); 593 if (rec == NULL) 594 rec = &sc->overflow; 595 if (!inotify_queue_record(sc, rec) && 596 rec != &sc->overflow) 597 free(rec, M_INOTIFY); 598 } 599 600 /* 601 * Remove the watch, taking care to handle races with 602 * inotify_close(). 603 */ 604 key.wd = watch->wd; 605 if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) { 606 RB_REMOVE(inotify_watch_tree, &sc->watches, watch); 607 inotify_unlink_watch_locked(sc, watch); 608 free(watch, M_INOTIFY); 609 610 /* Defer vrele() to until locks are dropped. */ 611 relecount++; 612 } 613 } 614 mtx_unlock(&sc->lock); 615 return (relecount); 616 } 617 618 void 619 inotify_log(struct vnode *vp, const char *name, size_t namelen, int event, 620 uint32_t cookie) 621 { 622 struct inotify_watch *watch, *tmp; 623 int relecount; 624 625 KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0, 626 ("inotify_log: invalid event %#x", event)); 627 628 relecount = 0; 629 mtx_lock(&vp->v_pollinfo->vpi_lock); 630 TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) { 631 KASSERT(watch->vp == vp, 632 ("inotify_log: watch %p vp != vp", watch)); 633 if ((watch->mask & event) != 0 || event == IN_UNMOUNT) { 634 relecount += inotify_log_one(watch, name, namelen, event, 635 cookie); 636 } 637 } 638 mtx_unlock(&vp->v_pollinfo->vpi_lock); 639 640 for (int i = 0; i < relecount; i++) 641 vrele(vp); 642 } 643 644 /* 645 * An inotify event occurred on a watched vnode. 646 */ 647 void 648 vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp, 649 int event, uint32_t cookie) 650 { 651 int isdir; 652 653 VNPASS(vp->v_holdcnt > 0, vp); 654 655 isdir = vp->v_type == VDIR ? IN_ISDIR : 0; 656 657 if (dvp != NULL) { 658 VNPASS(dvp->v_holdcnt > 0, dvp); 659 660 /* 661 * Should we log an event for the vnode itself? 662 */ 663 if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) { 664 int selfevent; 665 666 switch (event) { 667 case _IN_MOVE_DELETE: 668 case IN_DELETE: 669 /* 670 * IN_DELETE_SELF is only generated when the 671 * last hard link of a file is removed. 672 */ 673 selfevent = IN_DELETE_SELF; 674 if (vp->v_type != VDIR) { 675 struct vattr va; 676 int error; 677 678 error = VOP_GETATTR(vp, &va, 679 cnp->cn_cred); 680 if (error == 0 && va.va_nlink != 0) 681 selfevent = 0; 682 } 683 break; 684 case IN_MOVED_FROM: 685 cookie = 0; 686 selfevent = IN_MOVE_SELF; 687 break; 688 case _IN_ATTRIB_LINKCOUNT: 689 selfevent = IN_ATTRIB; 690 break; 691 default: 692 selfevent = event; 693 break; 694 } 695 696 if ((selfevent & ~_IN_DIR_EVENTS) != 0) { 697 inotify_log(vp, NULL, 0, selfevent | isdir, 698 cookie); 699 } 700 } 701 702 /* 703 * Something is watching the directory through which this vnode 704 * was referenced, so we may need to log the event. 705 */ 706 if ((event & IN_ALL_EVENTS) != 0 && 707 (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) { 708 inotify_log(dvp, cnp->cn_nameptr, 709 cnp->cn_namelen, event | isdir, cookie); 710 } 711 } else { 712 /* 713 * We don't know which watched directory might contain the 714 * vnode, so we have to fall back to searching the name cache. 715 */ 716 cache_vop_inotify(vp, event, cookie); 717 } 718 } 719 720 int 721 vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, 722 uint32_t *wdp, struct thread *td) 723 { 724 struct inotify_watch *watch, *watch1; 725 uint32_t wd; 726 727 /* 728 * If this is a directory, make sure all of its entries are present in 729 * the name cache so that we're able to look them up if an event occurs. 730 * The persistent reference on the directory prevents the outgoing name 731 * cache entries from being reclaimed. 732 */ 733 if (vp->v_type == VDIR) { 734 struct dirent *dp; 735 char *buf; 736 off_t off; 737 size_t buflen, len; 738 int eof, error; 739 740 buflen = 128 * sizeof(struct dirent); 741 buf = malloc(buflen, M_TEMP, M_WAITOK); 742 743 error = 0; 744 len = off = eof = 0; 745 for (;;) { 746 struct nameidata nd; 747 748 error = vn_dir_next_dirent(vp, td, buf, buflen, &dp, 749 &len, &off, &eof); 750 if (error != 0) 751 break; 752 if (len == 0) 753 /* Finished reading. */ 754 break; 755 if (strcmp(dp->d_name, ".") == 0 || 756 strcmp(dp->d_name, "..") == 0) 757 continue; 758 759 /* 760 * namei() consumes a reference on the starting 761 * directory if it's specified as a vnode. 762 */ 763 vrefact(vp); 764 VOP_UNLOCK(vp); 765 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, 766 dp->d_name, vp); 767 error = namei(&nd); 768 vn_lock(vp, LK_SHARED | LK_RETRY); 769 if (error != 0) 770 break; 771 vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); 772 vrele(nd.ni_vp); 773 } 774 free(buf, M_TEMP); 775 if (error != 0) 776 return (error); 777 } 778 779 /* 780 * The vnode referenced in kern_inotify_add_watch() might be different 781 * than this one if nullfs is in the picture. 782 */ 783 vrefact(vp); 784 watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO); 785 watch->sc = sc; 786 watch->vp = vp; 787 watch->mask = mask; 788 789 /* 790 * Are we updating an existing watch? Search the vnode's list rather 791 * than that of the softc, as the former is likely to be shorter. 792 */ 793 v_addpollinfo(vp); 794 mtx_lock(&vp->v_pollinfo->vpi_lock); 795 TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) { 796 if (watch1->sc == sc) 797 break; 798 } 799 mtx_lock(&sc->lock); 800 if (watch1 != NULL) { 801 mtx_unlock(&vp->v_pollinfo->vpi_lock); 802 803 /* 804 * We found an existing watch, update it based on our flags. 805 */ 806 if ((mask & IN_MASK_CREATE) != 0) { 807 mtx_unlock(&sc->lock); 808 vrele(vp); 809 free(watch, M_INOTIFY); 810 return (EEXIST); 811 } 812 if ((mask & IN_MASK_ADD) != 0) 813 watch1->mask |= mask; 814 else 815 watch1->mask = mask; 816 *wdp = watch1->wd; 817 mtx_unlock(&sc->lock); 818 vrele(vp); 819 free(watch, M_INOTIFY); 820 return (EJUSTRETURN); 821 } 822 823 /* 824 * We're creating a new watch. Add it to the softc and vnode watch 825 * lists. 826 */ 827 do { 828 struct inotify_watch key; 829 830 /* 831 * Search for the next available watch descriptor. This is 832 * implemented so as to avoid reusing watch descriptors for as 833 * long as possible. 834 */ 835 key.wd = wd = sc->nextwatch++; 836 watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key); 837 } while (watch1 != NULL || wd == 0); 838 watch->wd = wd; 839 RB_INSERT(inotify_watch_tree, &sc->watches, watch); 840 TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink); 841 mtx_unlock(&sc->lock); 842 mtx_unlock(&vp->v_pollinfo->vpi_lock); 843 vn_irflag_set_cond(vp, VIRF_INOTIFY); 844 845 *wdp = wd; 846 847 return (0); 848 } 849 850 void 851 vn_inotify_revoke(struct vnode *vp) 852 { 853 if (vp->v_pollinfo == NULL) { 854 /* This is a nullfs vnode which shadows a watched vnode. */ 855 return; 856 } 857 inotify_log(vp, NULL, 0, IN_UNMOUNT, 0); 858 } 859 860 static int 861 fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp, 862 struct file **fpp) 863 { 864 struct file *fp; 865 int error; 866 867 error = fget(td, fd, needrightsp, &fp); 868 if (error != 0) 869 return (error); 870 if (fp->f_type != DTYPE_INOTIFY) { 871 fdrop(fp, td); 872 return (EINVAL); 873 } 874 *fpp = fp; 875 return (0); 876 } 877 878 int 879 kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask, 880 struct thread *td) 881 { 882 struct nameidata nd; 883 struct file *fp; 884 struct inotify_softc *sc; 885 struct vnode *vp; 886 uint32_t wd; 887 int count, error; 888 889 fp = NULL; 890 vp = NULL; 891 892 if ((mask & IN_ALL_EVENTS) == 0) 893 return (EXTERROR(EINVAL, "no events specified")); 894 if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) == 895 (IN_MASK_ADD | IN_MASK_CREATE)) 896 return (EXTERROR(EINVAL, 897 "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive")); 898 if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0) 899 return (EXTERROR(EINVAL, "unrecognized flag")); 900 901 error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp); 902 if (error != 0) 903 return (error); 904 sc = fp->f_data; 905 906 NDINIT_AT(&nd, LOOKUP, 907 ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF | 908 LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd); 909 error = namei(&nd); 910 if (error != 0) 911 goto out; 912 NDFREE_PNBUF(&nd); 913 vp = nd.ni_vp; 914 915 error = VOP_ACCESS(vp, VREAD, td->td_ucred, td); 916 if (error != 0) 917 goto out; 918 919 if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) { 920 error = ENOTDIR; 921 goto out; 922 } 923 924 count = atomic_fetchadd_int(&inotify_watches, 1); 925 if (count > inotify_max_watches) { 926 atomic_subtract_int(&inotify_watches, 1); 927 error = ENOSPC; 928 goto out; 929 } 930 if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1, 931 inotify_max_user_watches)) { 932 atomic_subtract_int(&inotify_watches, 1); 933 error = ENOSPC; 934 goto out; 935 } 936 error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td); 937 if (error != 0) { 938 atomic_subtract_int(&inotify_watches, 1); 939 (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); 940 if (error == EJUSTRETURN) { 941 /* We updated an existing watch, everything is ok. */ 942 error = 0; 943 } else { 944 goto out; 945 } 946 } 947 td->td_retval[0] = wd; 948 949 out: 950 if (vp != NULL) 951 vput(vp); 952 fdrop(fp, td); 953 return (error); 954 } 955 956 int 957 sys_inotify_add_watch_at(struct thread *td, 958 struct inotify_add_watch_at_args *uap) 959 { 960 return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path, 961 uap->mask, td)); 962 } 963 964 int 965 kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td) 966 { 967 struct file *fp; 968 struct inotify_softc *sc; 969 struct inotify_record *rec; 970 struct inotify_watch key, *watch; 971 int error; 972 973 error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp); 974 if (error != 0) 975 return (error); 976 sc = fp->f_data; 977 978 rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK); 979 980 /* 981 * For compatibility with Linux, we do not remove pending events 982 * associated with the watch. Watch descriptors are implemented so as 983 * to avoid being reused for as long as possible, so one hopes that any 984 * pending events from the removed watch descriptor will be removed 985 * before the watch descriptor is recycled. 986 */ 987 key.wd = wd; 988 mtx_lock(&sc->lock); 989 watch = RB_FIND(inotify_watch_tree, &sc->watches, &key); 990 if (watch == NULL) { 991 free(rec, M_INOTIFY); 992 error = EINVAL; 993 } else { 994 RB_REMOVE(inotify_watch_tree, &sc->watches, watch); 995 if (!inotify_queue_record(sc, rec)) { 996 free(rec, M_INOTIFY); 997 error = 0; 998 } 999 } 1000 mtx_unlock(&sc->lock); 1001 if (watch != NULL) 1002 inotify_remove_watch(watch); 1003 fdrop(fp, td); 1004 return (error); 1005 } 1006 1007 int 1008 sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap) 1009 { 1010 return (kern_inotify_rm_watch(uap->fd, uap->wd, td)); 1011 } 1012