1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2025 Klara, Inc. 5 */ 6 7 #include "opt_ktrace.h" 8 9 #include <sys/param.h> 10 #include <sys/systm.h> 11 #include <sys/caprights.h> 12 #include <sys/counter.h> 13 #include <sys/dirent.h> 14 #define EXTERR_CATEGORY EXTERR_CAT_INOTIFY 15 #include <sys/exterrvar.h> 16 #include <sys/fcntl.h> 17 #include <sys/file.h> 18 #include <sys/filio.h> 19 #include <sys/inotify.h> 20 #include <sys/kernel.h> 21 #include <sys/lock.h> 22 #include <sys/ktrace.h> 23 #include <sys/malloc.h> 24 #include <sys/mutex.h> 25 #include <sys/namei.h> 26 #include <sys/poll.h> 27 #include <sys/proc.h> 28 #include <sys/queue.h> 29 #include <sys/resourcevar.h> 30 #include <sys/selinfo.h> 31 #include <sys/stat.h> 32 #include <sys/syscallsubr.h> 33 #include <sys/sysctl.h> 34 #include <sys/sysent.h> 35 #include <sys/syslimits.h> 36 #include <sys/sysproto.h> 37 #include <sys/tree.h> 38 #include <sys/user.h> 39 #include <sys/vnode.h> 40 41 uint32_t inotify_rename_cookie; 42 43 static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 44 "inotify configuration"); 45 46 static int inotify_max_queued_events = 16384; 47 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN, 48 &inotify_max_queued_events, 0, 49 "Maximum number of events to queue on an inotify descriptor"); 50 51 static int inotify_max_user_instances = 256; 52 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN, 53 &inotify_max_user_instances, 0, 54 "Maximum number of inotify descriptors per user"); 55 56 static int inotify_max_user_watches; 57 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN, 58 &inotify_max_user_watches, 0, 59 "Maximum number of inotify watches per user"); 60 61 static int inotify_max_watches; 62 SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN, 63 &inotify_max_watches, 0, 64 "Maximum number of inotify watches system-wide"); 65 66 static int inotify_watches; 67 SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD, 68 &inotify_watches, 0, 69 "Total number of inotify watches currently in use"); 70 71 static int inotify_coalesce = 1; 72 SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN, 73 &inotify_coalesce, 0, 74 "Coalesce inotify events when possible"); 75 76 static COUNTER_U64_DEFINE_EARLY(inotify_event_drops); 77 SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD, 78 &inotify_event_drops, 79 "Number of inotify events dropped due to limits or allocation failures"); 80 81 static fo_rdwr_t inotify_read; 82 static fo_ioctl_t inotify_ioctl; 83 static fo_poll_t inotify_poll; 84 static fo_kqfilter_t inotify_kqfilter; 85 static fo_stat_t inotify_stat; 86 static fo_close_t inotify_close; 87 static fo_fill_kinfo_t inotify_fill_kinfo; 88 89 static const struct fileops inotifyfdops = { 90 .fo_read = inotify_read, 91 .fo_write = invfo_rdwr, 92 .fo_truncate = invfo_truncate, 93 .fo_ioctl = inotify_ioctl, 94 .fo_poll = inotify_poll, 95 .fo_kqfilter = inotify_kqfilter, 96 .fo_stat = inotify_stat, 97 .fo_close = inotify_close, 98 .fo_chmod = invfo_chmod, 99 .fo_chown = invfo_chown, 100 .fo_sendfile = invfo_sendfile, 101 .fo_fill_kinfo = inotify_fill_kinfo, 102 .fo_cmp = file_kcmp_generic, 103 .fo_flags = DFLAG_PASSABLE, 104 }; 105 106 static void filt_inotifydetach(struct knote *kn); 107 static int filt_inotifyevent(struct knote *kn, long hint); 108 109 static const struct filterops inotify_rfiltops = { 110 .f_isfd = 1, 111 .f_detach = filt_inotifydetach, 112 .f_event = filt_inotifyevent, 113 }; 114 115 static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); 116 117 struct inotify_record { 118 STAILQ_ENTRY(inotify_record) link; 119 struct inotify_event ev; 120 }; 121 122 static uint64_t inotify_ino = 1; 123 124 /* 125 * On LP64 systems this occupies 64 bytes, so we don't get internal 126 * fragmentation by allocating watches with malloc(9). If the size changes, 127 * consider using a UMA zone to improve memory efficiency. 128 */ 129 struct inotify_watch { 130 struct inotify_softc *sc; /* back-pointer */ 131 int wd; /* unique ID */ 132 uint32_t mask; /* event mask */ 133 struct vnode *vp; /* vnode being watched, refed */ 134 RB_ENTRY(inotify_watch) ilink; /* inotify linkage */ 135 TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */ 136 }; 137 138 static void 139 inotify_init(void *arg __unused) 140 { 141 /* Don't let a user hold too many vnodes. */ 142 inotify_max_user_watches = desiredvnodes / 3; 143 /* Don't let the system hold too many vnodes. */ 144 inotify_max_watches = desiredvnodes / 2; 145 } 146 SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL); 147 148 static int 149 inotify_watch_cmp(const struct inotify_watch *a, 150 const struct inotify_watch *b) 151 { 152 if (a->wd < b->wd) 153 return (-1); 154 else if (a->wd > b->wd) 155 return (1); 156 else 157 return (0); 158 } 159 RB_HEAD(inotify_watch_tree, inotify_watch); 160 RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp); 161 162 struct inotify_softc { 163 struct mtx lock; /* serialize all softc writes */ 164 STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */ 165 struct inotify_record overflow; /* preallocated record */ 166 int nextwatch; /* next watch ID to try */ 167 int npending; /* number of pending events */ 168 size_t nbpending; /* bytes available to read */ 169 uint64_t ino; /* unique identifier */ 170 struct inotify_watch_tree watches; /* active watches */ 171 struct selinfo sel; /* select/poll/kevent info */ 172 struct ucred *cred; /* credential ref */ 173 }; 174 175 static struct inotify_record * 176 inotify_dequeue(struct inotify_softc *sc) 177 { 178 struct inotify_record *rec; 179 180 mtx_assert(&sc->lock, MA_OWNED); 181 KASSERT(!STAILQ_EMPTY(&sc->pending), 182 ("%s: queue for %p is empty", __func__, sc)); 183 184 rec = STAILQ_FIRST(&sc->pending); 185 STAILQ_REMOVE_HEAD(&sc->pending, link); 186 sc->npending--; 187 sc->nbpending -= sizeof(rec->ev) + rec->ev.len; 188 return (rec); 189 } 190 191 static void 192 inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head) 193 { 194 mtx_assert(&sc->lock, MA_OWNED); 195 196 if (head) 197 STAILQ_INSERT_HEAD(&sc->pending, rec, link); 198 else 199 STAILQ_INSERT_TAIL(&sc->pending, rec, link); 200 sc->npending++; 201 sc->nbpending += sizeof(rec->ev) + rec->ev.len; 202 } 203 204 static int 205 inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, 206 struct thread *td) 207 { 208 struct inotify_softc *sc; 209 struct inotify_record *rec; 210 int error; 211 bool first; 212 213 sc = fp->f_data; 214 error = 0; 215 216 mtx_lock(&sc->lock); 217 while (STAILQ_EMPTY(&sc->pending)) { 218 if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) { 219 mtx_unlock(&sc->lock); 220 return (EWOULDBLOCK); 221 } 222 error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0); 223 if (error != 0) { 224 mtx_unlock(&sc->lock); 225 return (error); 226 } 227 } 228 for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) { 229 size_t len; 230 231 rec = inotify_dequeue(sc); 232 len = sizeof(rec->ev) + rec->ev.len; 233 if (uio->uio_resid < (ssize_t)len) { 234 inotify_enqueue(sc, rec, true); 235 if (first) { 236 error = EXTERROR(EINVAL, 237 "read buffer is too small"); 238 } 239 break; 240 } 241 mtx_unlock(&sc->lock); 242 error = uiomove(&rec->ev, len, uio); 243 #ifdef KTRACE 244 if (error == 0 && KTRPOINT(td, KTR_STRUCT)) 245 ktrstruct("inotify", &rec->ev, len); 246 #endif 247 mtx_lock(&sc->lock); 248 if (error != 0) { 249 inotify_enqueue(sc, rec, true); 250 mtx_unlock(&sc->lock); 251 return (error); 252 } 253 if (rec == &sc->overflow) { 254 /* 255 * Signal to inotify_queue_record() that the overflow 256 * record can be reused. 257 */ 258 memset(rec, 0, sizeof(*rec)); 259 } else { 260 free(rec, M_INOTIFY); 261 } 262 } 263 mtx_unlock(&sc->lock); 264 return (error); 265 } 266 267 static int 268 inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred, 269 struct thread *td) 270 { 271 struct inotify_softc *sc; 272 273 sc = fp->f_data; 274 275 switch (com) { 276 case FIONREAD: 277 *(int *)data = (int)sc->nbpending; 278 return (0); 279 case FIONBIO: 280 case FIOASYNC: 281 return (0); 282 default: 283 return (ENOTTY); 284 } 285 286 return (0); 287 } 288 289 static int 290 inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) 291 { 292 struct inotify_softc *sc; 293 int revents; 294 295 sc = fp->f_data; 296 revents = 0; 297 298 mtx_lock(&sc->lock); 299 if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0) 300 revents |= events & (POLLIN | POLLRDNORM); 301 else 302 selrecord(td, &sc->sel); 303 mtx_unlock(&sc->lock); 304 return (revents); 305 } 306 307 static void 308 filt_inotifydetach(struct knote *kn) 309 { 310 struct inotify_softc *sc; 311 312 sc = kn->kn_hook; 313 knlist_remove(&sc->sel.si_note, kn, 0); 314 } 315 316 static int 317 filt_inotifyevent(struct knote *kn, long hint) 318 { 319 struct inotify_softc *sc; 320 321 sc = kn->kn_hook; 322 mtx_assert(&sc->lock, MA_OWNED); 323 kn->kn_data = sc->nbpending; 324 return (kn->kn_data > 0); 325 } 326 327 static int 328 inotify_kqfilter(struct file *fp, struct knote *kn) 329 { 330 struct inotify_softc *sc; 331 332 if (kn->kn_filter != EVFILT_READ) 333 return (EINVAL); 334 sc = fp->f_data; 335 kn->kn_fop = &inotify_rfiltops; 336 kn->kn_hook = sc; 337 knlist_add(&sc->sel.si_note, kn, 0); 338 return (0); 339 } 340 341 static int 342 inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred) 343 { 344 struct inotify_softc *sc; 345 346 sc = fp->f_data; 347 348 memset(sb, 0, sizeof(*sb)); 349 sb->st_mode = S_IFREG | S_IRUSR; 350 sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX); 351 mtx_lock(&sc->lock); 352 sb->st_size = sc->nbpending; 353 sb->st_blocks = sc->npending; 354 sb->st_uid = sc->cred->cr_ruid; 355 sb->st_gid = sc->cred->cr_rgid; 356 sb->st_ino = sc->ino; 357 mtx_unlock(&sc->lock); 358 return (0); 359 } 360 361 static void 362 inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch) 363 { 364 struct vnode *vp; 365 366 vp = watch->vp; 367 mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED); 368 369 atomic_subtract_int(&inotify_watches, 1); 370 (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); 371 372 TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink); 373 if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify)) 374 vn_irflag_unset_locked(vp, VIRF_INOTIFY); 375 } 376 377 /* 378 * Assumes that the watch has already been removed from its softc. 379 */ 380 static void 381 inotify_remove_watch(struct inotify_watch *watch) 382 { 383 struct inotify_softc *sc; 384 struct vnode *vp; 385 386 sc = watch->sc; 387 388 vp = watch->vp; 389 mtx_lock(&vp->v_pollinfo->vpi_lock); 390 inotify_unlink_watch_locked(sc, watch); 391 mtx_unlock(&vp->v_pollinfo->vpi_lock); 392 393 vrele(vp); 394 free(watch, M_INOTIFY); 395 } 396 397 static int 398 inotify_close(struct file *fp, struct thread *td) 399 { 400 struct inotify_softc *sc; 401 struct inotify_record *rec; 402 struct inotify_watch *watch; 403 404 sc = fp->f_data; 405 406 mtx_lock(&sc->lock); 407 (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0); 408 while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) { 409 RB_REMOVE(inotify_watch_tree, &sc->watches, watch); 410 mtx_unlock(&sc->lock); 411 inotify_remove_watch(watch); 412 mtx_lock(&sc->lock); 413 } 414 while (!STAILQ_EMPTY(&sc->pending)) { 415 rec = inotify_dequeue(sc); 416 if (rec != &sc->overflow) 417 free(rec, M_INOTIFY); 418 } 419 mtx_unlock(&sc->lock); 420 seldrain(&sc->sel); 421 knlist_destroy(&sc->sel.si_note); 422 mtx_destroy(&sc->lock); 423 crfree(sc->cred); 424 free(sc, M_INOTIFY); 425 return (0); 426 } 427 428 static int 429 inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif, 430 struct filedesc *fdp) 431 { 432 struct inotify_softc *sc; 433 434 sc = fp->f_data; 435 436 kif->kf_type = KF_TYPE_INOTIFY; 437 kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending; 438 kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending; 439 return (0); 440 } 441 442 int 443 inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp) 444 { 445 struct inotify_softc *sc; 446 int fflags; 447 448 if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0) 449 return (EINVAL); 450 451 if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1, 452 inotify_max_user_instances)) 453 return (EMFILE); 454 455 sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO); 456 sc->nextwatch = 1; /* Required for compatibility. */ 457 STAILQ_INIT(&sc->pending); 458 RB_INIT(&sc->watches); 459 mtx_init(&sc->lock, "inotify", NULL, MTX_DEF); 460 knlist_init_mtx(&sc->sel.si_note, &sc->lock); 461 sc->cred = crhold(td->td_ucred); 462 sc->ino = atomic_fetchadd_64(&inotify_ino, 1); 463 464 fflags = FREAD; 465 if ((flags & IN_NONBLOCK) != 0) 466 fflags |= FNONBLOCK; 467 if ((flags & IN_CLOEXEC) != 0) 468 *fflagsp |= O_CLOEXEC; 469 finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops); 470 471 return (0); 472 } 473 474 static struct inotify_record * 475 inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event, 476 uint32_t cookie, int waitok) 477 { 478 struct inotify_event *evp; 479 struct inotify_record *rec; 480 481 rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY, 482 waitok | M_ZERO); 483 if (rec == NULL) 484 return (NULL); 485 evp = &rec->ev; 486 evp->wd = wd; 487 evp->mask = event; 488 evp->cookie = cookie; 489 evp->len = _IN_NAMESIZE(namelen); 490 if (name != NULL) 491 memcpy(evp->name, name, namelen); 492 return (rec); 493 } 494 495 static bool 496 inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) 497 { 498 struct inotify_record *prev; 499 500 mtx_assert(&sc->lock, MA_OWNED); 501 502 prev = STAILQ_LAST(&sc->pending, inotify_record, link); 503 return (prev != NULL && prev->ev.mask == evp->mask && 504 prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && 505 prev->ev.len == evp->len && 506 (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0)); 507 } 508 509 static void 510 inotify_overflow_event(struct inotify_event *evp) 511 { 512 evp->mask = IN_Q_OVERFLOW; 513 evp->wd = -1; 514 evp->cookie = 0; 515 evp->len = 0; 516 } 517 518 /* 519 * Put an event record on the queue for an inotify desscriptor. Return false if 520 * the record was not enqueued for some reason, true otherwise. 521 */ 522 static bool 523 inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec) 524 { 525 struct inotify_event *evp; 526 527 mtx_assert(&sc->lock, MA_OWNED); 528 529 evp = &rec->ev; 530 if (__predict_false(rec == &sc->overflow)) { 531 /* 532 * Is the overflow record already in the queue? If so, there's 533 * not much else we can do: we're here because a kernel memory 534 * shortage prevented new record allocations. 535 */ 536 counter_u64_add(inotify_event_drops, 1); 537 if (evp->mask == IN_Q_OVERFLOW) 538 return (false); 539 inotify_overflow_event(evp); 540 } else { 541 /* Try to coalesce duplicate events. */ 542 if (inotify_coalesce && inotify_can_coalesce(sc, evp)) 543 return (false); 544 545 /* 546 * Would this one overflow the queue? If so, convert it to an 547 * overflow event and try again to coalesce. 548 */ 549 if (sc->npending >= inotify_max_queued_events) { 550 counter_u64_add(inotify_event_drops, 1); 551 inotify_overflow_event(evp); 552 if (inotify_can_coalesce(sc, evp)) 553 return (false); 554 } 555 } 556 inotify_enqueue(sc, rec, false); 557 selwakeup(&sc->sel); 558 KNOTE_LOCKED(&sc->sel.si_note, 0); 559 wakeup(&sc->pending); 560 return (true); 561 } 562 563 static int 564 inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen, 565 int event, uint32_t cookie) 566 { 567 struct inotify_watch key; 568 struct inotify_softc *sc; 569 struct inotify_record *rec; 570 int relecount; 571 bool allocfail; 572 573 relecount = 0; 574 575 sc = watch->sc; 576 rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie, 577 M_NOWAIT); 578 if (rec == NULL) { 579 rec = &sc->overflow; 580 allocfail = true; 581 } else { 582 allocfail = false; 583 } 584 585 mtx_lock(&sc->lock); 586 if (!inotify_queue_record(sc, rec) && rec != &sc->overflow) 587 free(rec, M_INOTIFY); 588 if ((watch->mask & IN_ONESHOT) != 0 || 589 (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) { 590 if (!allocfail) { 591 rec = inotify_alloc_record(watch->wd, NULL, 0, 592 IN_IGNORED, 0, M_NOWAIT); 593 if (rec == NULL) 594 rec = &sc->overflow; 595 if (!inotify_queue_record(sc, rec) && 596 rec != &sc->overflow) 597 free(rec, M_INOTIFY); 598 } 599 600 /* 601 * Remove the watch, taking care to handle races with 602 * inotify_close(). 603 */ 604 key.wd = watch->wd; 605 if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) { 606 RB_REMOVE(inotify_watch_tree, &sc->watches, watch); 607 inotify_unlink_watch_locked(sc, watch); 608 free(watch, M_INOTIFY); 609 610 /* Defer vrele() to until locks are dropped. */ 611 relecount++; 612 } 613 } 614 mtx_unlock(&sc->lock); 615 return (relecount); 616 } 617 618 void 619 inotify_log(struct vnode *vp, const char *name, size_t namelen, int event, 620 uint32_t cookie) 621 { 622 struct inotify_watch *watch, *tmp; 623 int relecount; 624 625 KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0, 626 ("inotify_log: invalid event %#x", event)); 627 628 relecount = 0; 629 mtx_lock(&vp->v_pollinfo->vpi_lock); 630 TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) { 631 KASSERT(watch->vp == vp, 632 ("inotify_log: watch %p vp != vp", watch)); 633 if ((watch->mask & event) != 0 || event == IN_UNMOUNT) { 634 relecount += inotify_log_one(watch, name, namelen, event, 635 cookie); 636 } 637 } 638 mtx_unlock(&vp->v_pollinfo->vpi_lock); 639 640 for (int i = 0; i < relecount; i++) 641 vrele(vp); 642 } 643 644 /* 645 * An inotify event occurred on a watched vnode. 646 */ 647 void 648 vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp, 649 int event, uint32_t cookie) 650 { 651 int isdir; 652 653 VNPASS(vp->v_holdcnt > 0, vp); 654 655 isdir = vp->v_type == VDIR ? IN_ISDIR : 0; 656 657 if (dvp != NULL) { 658 VNPASS(dvp->v_holdcnt > 0, dvp); 659 660 /* 661 * Should we log an event for the vnode itself? 662 */ 663 if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) { 664 int selfevent; 665 666 switch (event) { 667 case _IN_MOVE_DELETE: 668 case IN_DELETE: 669 /* 670 * IN_DELETE_SELF is only generated when the 671 * last hard link of a file is removed. 672 */ 673 selfevent = IN_DELETE_SELF; 674 if (vp->v_type != VDIR) { 675 struct vattr va; 676 int error; 677 678 error = VOP_GETATTR(vp, &va, cnp->cn_cred); 679 if (error == 0 && va.va_nlink != 0) 680 selfevent = 0; 681 } 682 break; 683 case IN_MOVED_FROM: 684 cookie = 0; 685 selfevent = IN_MOVE_SELF; 686 break; 687 case _IN_ATTRIB_LINKCOUNT: 688 selfevent = IN_ATTRIB; 689 break; 690 default: 691 selfevent = event; 692 break; 693 } 694 695 if ((selfevent & ~_IN_DIR_EVENTS) != 0) { 696 inotify_log(vp, NULL, 0, selfevent | isdir, 697 cookie); 698 } 699 } 700 701 /* 702 * Something is watching the directory through which this vnode 703 * was referenced, so we may need to log the event. 704 */ 705 if ((event & IN_ALL_EVENTS) != 0 && 706 (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) { 707 inotify_log(dvp, cnp->cn_nameptr, 708 cnp->cn_namelen, event | isdir, cookie); 709 } 710 } else { 711 /* 712 * We don't know which watched directory might contain the 713 * vnode, so we have to fall back to searching the name cache. 714 */ 715 cache_vop_inotify(vp, event, cookie); 716 } 717 } 718 719 int 720 vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, 721 uint32_t *wdp, struct thread *td) 722 { 723 struct inotify_watch *watch, *watch1; 724 uint32_t wd; 725 726 /* 727 * If this is a directory, make sure all of its entries are present in 728 * the name cache so that we're able to look them up if an event occurs. 729 * The persistent reference on the directory prevents the outgoing name 730 * cache entries from being reclaimed. 731 */ 732 if (vp->v_type == VDIR) { 733 struct dirent *dp; 734 char *buf; 735 off_t off; 736 size_t buflen, len; 737 int eof, error; 738 739 buflen = 128 * sizeof(struct dirent); 740 buf = malloc(buflen, M_TEMP, M_WAITOK); 741 742 error = 0; 743 len = off = eof = 0; 744 for (;;) { 745 struct nameidata nd; 746 747 error = vn_dir_next_dirent(vp, td, buf, buflen, &dp, 748 &len, &off, &eof); 749 if (error != 0) 750 break; 751 if (len == 0) 752 /* Finished reading. */ 753 break; 754 if (strcmp(dp->d_name, ".") == 0 || 755 strcmp(dp->d_name, "..") == 0) 756 continue; 757 758 /* 759 * namei() consumes a reference on the starting 760 * directory if it's specified as a vnode. 761 */ 762 vrefact(vp); 763 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, 764 dp->d_name, vp); 765 error = namei(&nd); 766 if (error != 0) 767 break; 768 vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); 769 vrele(nd.ni_vp); 770 } 771 free(buf, M_TEMP); 772 if (error != 0) 773 return (error); 774 } 775 776 /* 777 * The vnode referenced in kern_inotify_add_watch() might be different 778 * than this one if nullfs is in the picture. 779 */ 780 vrefact(vp); 781 watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO); 782 watch->sc = sc; 783 watch->vp = vp; 784 watch->mask = mask; 785 786 /* 787 * Are we updating an existing watch? Search the vnode's list rather 788 * than that of the softc, as the former is likely to be shorter. 789 */ 790 v_addpollinfo(vp); 791 mtx_lock(&vp->v_pollinfo->vpi_lock); 792 TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) { 793 if (watch1->sc == sc) 794 break; 795 } 796 mtx_lock(&sc->lock); 797 if (watch1 != NULL) { 798 mtx_unlock(&vp->v_pollinfo->vpi_lock); 799 800 /* 801 * We found an existing watch, update it based on our flags. 802 */ 803 if ((mask & IN_MASK_CREATE) != 0) { 804 mtx_unlock(&sc->lock); 805 vrele(vp); 806 free(watch, M_INOTIFY); 807 return (EEXIST); 808 } 809 if ((mask & IN_MASK_ADD) != 0) 810 watch1->mask |= mask; 811 else 812 watch1->mask = mask; 813 *wdp = watch1->wd; 814 mtx_unlock(&sc->lock); 815 vrele(vp); 816 free(watch, M_INOTIFY); 817 return (EJUSTRETURN); 818 } 819 820 /* 821 * We're creating a new watch. Add it to the softc and vnode watch 822 * lists. 823 */ 824 do { 825 struct inotify_watch key; 826 827 /* 828 * Search for the next available watch descriptor. This is 829 * implemented so as to avoid reusing watch descriptors for as 830 * long as possible. 831 */ 832 key.wd = wd = sc->nextwatch++; 833 watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key); 834 } while (watch1 != NULL || wd == 0); 835 watch->wd = wd; 836 RB_INSERT(inotify_watch_tree, &sc->watches, watch); 837 TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink); 838 mtx_unlock(&sc->lock); 839 mtx_unlock(&vp->v_pollinfo->vpi_lock); 840 vn_irflag_set_cond(vp, VIRF_INOTIFY); 841 842 *wdp = wd; 843 844 return (0); 845 } 846 847 void 848 vn_inotify_revoke(struct vnode *vp) 849 { 850 if (vp->v_pollinfo == NULL) { 851 /* This is a nullfs vnode which shadows a watched vnode. */ 852 return; 853 } 854 inotify_log(vp, NULL, 0, IN_UNMOUNT, 0); 855 } 856 857 static int 858 fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp, 859 struct file **fpp) 860 { 861 struct file *fp; 862 int error; 863 864 error = fget(td, fd, needrightsp, &fp); 865 if (error != 0) 866 return (error); 867 if (fp->f_type != DTYPE_INOTIFY) { 868 fdrop(fp, td); 869 return (EINVAL); 870 } 871 *fpp = fp; 872 return (0); 873 } 874 875 int 876 kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask, 877 struct thread *td) 878 { 879 struct nameidata nd; 880 struct file *fp; 881 struct inotify_softc *sc; 882 struct vnode *vp; 883 uint32_t wd; 884 int count, error; 885 886 fp = NULL; 887 vp = NULL; 888 889 if ((mask & IN_ALL_EVENTS) == 0) 890 return (EXTERROR(EINVAL, "no events specified")); 891 if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) == 892 (IN_MASK_ADD | IN_MASK_CREATE)) 893 return (EXTERROR(EINVAL, 894 "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive")); 895 if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0) 896 return (EXTERROR(EINVAL, "unrecognized flag")); 897 898 error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp); 899 if (error != 0) 900 return (error); 901 sc = fp->f_data; 902 903 NDINIT_AT(&nd, LOOKUP, 904 ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF | 905 LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd); 906 error = namei(&nd); 907 if (error != 0) 908 goto out; 909 NDFREE_PNBUF(&nd); 910 vp = nd.ni_vp; 911 912 error = VOP_ACCESS(vp, VREAD, td->td_ucred, td); 913 if (error != 0) 914 goto out; 915 916 if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) { 917 error = ENOTDIR; 918 goto out; 919 } 920 921 count = atomic_fetchadd_int(&inotify_watches, 1); 922 if (count > inotify_max_watches) { 923 atomic_subtract_int(&inotify_watches, 1); 924 error = ENOSPC; 925 goto out; 926 } 927 if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1, 928 inotify_max_user_watches)) { 929 atomic_subtract_int(&inotify_watches, 1); 930 error = ENOSPC; 931 goto out; 932 } 933 error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td); 934 if (error != 0) { 935 atomic_subtract_int(&inotify_watches, 1); 936 (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); 937 if (error == EJUSTRETURN) { 938 /* We updated an existing watch, everything is ok. */ 939 error = 0; 940 } else { 941 goto out; 942 } 943 } 944 td->td_retval[0] = wd; 945 946 out: 947 if (vp != NULL) 948 vput(vp); 949 fdrop(fp, td); 950 return (error); 951 } 952 953 int 954 sys_inotify_add_watch_at(struct thread *td, 955 struct inotify_add_watch_at_args *uap) 956 { 957 return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path, 958 uap->mask, td)); 959 } 960 961 int 962 kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td) 963 { 964 struct file *fp; 965 struct inotify_softc *sc; 966 struct inotify_record *rec; 967 struct inotify_watch key, *watch; 968 int error; 969 970 error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp); 971 if (error != 0) 972 return (error); 973 sc = fp->f_data; 974 975 rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK); 976 977 /* 978 * For compatibility with Linux, we do not remove pending events 979 * associated with the watch. Watch descriptors are implemented so as 980 * to avoid being reused for as long as possible, so one hopes that any 981 * pending events from the removed watch descriptor will be removed 982 * before the watch descriptor is recycled. 983 */ 984 key.wd = wd; 985 mtx_lock(&sc->lock); 986 watch = RB_FIND(inotify_watch_tree, &sc->watches, &key); 987 if (watch == NULL) { 988 free(rec, M_INOTIFY); 989 error = EINVAL; 990 } else { 991 RB_REMOVE(inotify_watch_tree, &sc->watches, watch); 992 if (!inotify_queue_record(sc, rec)) { 993 free(rec, M_INOTIFY); 994 error = 0; 995 } 996 } 997 mtx_unlock(&sc->lock); 998 if (watch != NULL) 999 inotify_remove_watch(watch); 1000 fdrop(fp, td); 1001 return (error); 1002 } 1003 1004 int 1005 sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap) 1006 { 1007 return (kern_inotify_rm_watch(uap->fd, uap->wd, td)); 1008 } 1009