1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 4 */ 5 6 /* 7 * fsnotify inode mark locking/lifetime/and refcnting 8 * 9 * REFCNT: 10 * The group->recnt and mark->refcnt tell how many "things" in the kernel 11 * currently are referencing the objects. Both kind of objects typically will 12 * live inside the kernel with a refcnt of 2, one for its creation and one for 13 * the reference a group and a mark hold to each other. 14 * If you are holding the appropriate locks, you can take a reference and the 15 * object itself is guaranteed to survive until the reference is dropped. 16 * 17 * LOCKING: 18 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 19 * in order as follows: 20 * 21 * group->mark_mutex 22 * mark->lock 23 * mark->connector->lock 24 * 25 * group->mark_mutex protects the marks_list anchored inside a given group and 26 * each mark is hooked via the g_list. It also protects the groups private 27 * data (i.e group limits). 28 29 * mark->lock protects the marks attributes like its masks and flags. 30 * Furthermore it protects the access to a reference of the group that the mark 31 * is assigned to as well as the access to a reference of the inode/vfsmount 32 * that is being watched by the mark. 33 * 34 * mark->connector->lock protects the list of marks anchored inside an 35 * inode / vfsmount and each mark is hooked via the i_list. 36 * 37 * A list of notification marks relating to inode / mnt is contained in 38 * fsnotify_mark_connector. That structure is alive as long as there are any 39 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets 40 * detached from fsnotify_mark_connector when last reference to the mark is 41 * dropped. Thus having mark reference is enough to protect mark->connector 42 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also 43 * because we remove mark from g_list before dropping mark reference associated 44 * with that, any mark found through g_list is guaranteed to have 45 * mark->connector set until we drop group->mark_mutex. 46 * 47 * LIFETIME: 48 * Inode marks survive between when they are added to an inode and when their 49 * refcnt==0. Marks are also protected by fsnotify_mark_srcu. 50 * 51 * The inode mark can be cleared for a number of different reasons including: 52 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 53 * - The inode is being evicted from cache. (fsnotify_inode_delete) 54 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 55 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 56 * - The fsnotify_group associated with the mark is going away and all such marks 57 * need to be cleaned up. (fsnotify_clear_marks_by_group) 58 * 59 * This has the very interesting property of being able to run concurrently with 60 * any (or all) other directions. 61 */ 62 63 #include <linux/fs.h> 64 #include <linux/init.h> 65 #include <linux/kernel.h> 66 #include <linux/kthread.h> 67 #include <linux/module.h> 68 #include <linux/mutex.h> 69 #include <linux/slab.h> 70 #include <linux/spinlock.h> 71 #include <linux/srcu.h> 72 #include <linux/ratelimit.h> 73 74 #include <linux/atomic.h> 75 76 #include <linux/fsnotify_backend.h> 77 #include "fsnotify.h" 78 79 #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ 80 81 struct srcu_struct fsnotify_mark_srcu; 82 struct kmem_cache *fsnotify_mark_connector_cachep; 83 84 static DEFINE_SPINLOCK(destroy_lock); 85 static LIST_HEAD(destroy_list); 86 static struct fsnotify_mark_connector *connector_destroy_list; 87 88 static void fsnotify_mark_destroy_workfn(struct work_struct *work); 89 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); 90 91 static void fsnotify_connector_destroy_workfn(struct work_struct *work); 92 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); 93 94 void fsnotify_get_mark(struct fsnotify_mark *mark) 95 { 96 WARN_ON_ONCE(!refcount_read(&mark->refcnt)); 97 refcount_inc(&mark->refcnt); 98 } 99 100 static fsnotify_connp_t *fsnotify_object_connp(void *obj, 101 enum fsnotify_obj_type obj_type) 102 { 103 switch (obj_type) { 104 case FSNOTIFY_OBJ_TYPE_INODE: 105 return &((struct inode *)obj)->i_fsnotify_marks; 106 case FSNOTIFY_OBJ_TYPE_VFSMOUNT: 107 return &real_mount(obj)->mnt_fsnotify_marks; 108 case FSNOTIFY_OBJ_TYPE_SB: 109 return fsnotify_sb_marks(obj); 110 default: 111 return NULL; 112 } 113 } 114 115 static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) 116 { 117 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 118 return &fsnotify_conn_inode(conn)->i_fsnotify_mask; 119 else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) 120 return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; 121 else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) 122 return &fsnotify_conn_sb(conn)->s_fsnotify_mask; 123 return NULL; 124 } 125 126 __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) 127 { 128 if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) 129 return 0; 130 131 return READ_ONCE(*fsnotify_conn_mask_p(conn)); 132 } 133 134 static void fsnotify_get_sb_watched_objects(struct super_block *sb) 135 { 136 atomic_long_inc(fsnotify_sb_watched_objects(sb)); 137 } 138 139 static void fsnotify_put_sb_watched_objects(struct super_block *sb) 140 { 141 if (atomic_long_dec_and_test(fsnotify_sb_watched_objects(sb))) 142 wake_up_var(fsnotify_sb_watched_objects(sb)); 143 } 144 145 static void fsnotify_get_inode_ref(struct inode *inode) 146 { 147 ihold(inode); 148 fsnotify_get_sb_watched_objects(inode->i_sb); 149 } 150 151 static void fsnotify_put_inode_ref(struct inode *inode) 152 { 153 fsnotify_put_sb_watched_objects(inode->i_sb); 154 iput(inode); 155 } 156 157 /* 158 * Grab or drop watched objects reference depending on whether the connector 159 * is attached and has any marks attached. 160 */ 161 static void fsnotify_update_sb_watchers(struct super_block *sb, 162 struct fsnotify_mark_connector *conn) 163 { 164 struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); 165 bool is_watched = conn->flags & FSNOTIFY_CONN_FLAG_IS_WATCHED; 166 struct fsnotify_mark *first_mark = NULL; 167 unsigned int highest_prio = 0; 168 169 if (conn->obj) 170 first_mark = hlist_entry_safe(conn->list.first, 171 struct fsnotify_mark, obj_list); 172 if (first_mark) 173 highest_prio = first_mark->group->priority; 174 if (WARN_ON(highest_prio >= __FSNOTIFY_PRIO_NUM)) 175 highest_prio = 0; 176 177 /* 178 * If the highest priority of group watching this object is prio, 179 * then watched object has a reference on counters [0..prio]. 180 * Update priority >= 1 watched objects counters. 181 */ 182 for (unsigned int p = conn->prio + 1; p <= highest_prio; p++) 183 atomic_long_inc(&sbinfo->watched_objects[p]); 184 for (unsigned int p = conn->prio; p > highest_prio; p--) 185 atomic_long_dec(&sbinfo->watched_objects[p]); 186 conn->prio = highest_prio; 187 188 /* Update priority >= 0 (a.k.a total) watched objects counter */ 189 BUILD_BUG_ON(FSNOTIFY_PRIO_NORMAL != 0); 190 if (first_mark && !is_watched) { 191 conn->flags |= FSNOTIFY_CONN_FLAG_IS_WATCHED; 192 fsnotify_get_sb_watched_objects(sb); 193 } else if (!first_mark && is_watched) { 194 conn->flags &= ~FSNOTIFY_CONN_FLAG_IS_WATCHED; 195 fsnotify_put_sb_watched_objects(sb); 196 } 197 } 198 199 /* 200 * Grab or drop inode reference for the connector if needed. 201 * 202 * When it's time to drop the reference, we only clear the HAS_IREF flag and 203 * return the inode object. fsnotify_drop_object() will be resonsible for doing 204 * iput() outside of spinlocks. This happens when last mark that wanted iref is 205 * detached. 206 */ 207 static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn, 208 bool want_iref) 209 { 210 bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF; 211 struct inode *inode = NULL; 212 213 if (conn->type != FSNOTIFY_OBJ_TYPE_INODE || 214 want_iref == has_iref) 215 return NULL; 216 217 if (want_iref) { 218 /* Pin inode if any mark wants inode refcount held */ 219 fsnotify_get_inode_ref(fsnotify_conn_inode(conn)); 220 conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF; 221 } else { 222 /* Unpin inode after detach of last mark that wanted iref */ 223 inode = fsnotify_conn_inode(conn); 224 conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF; 225 } 226 227 return inode; 228 } 229 230 static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 231 { 232 u32 new_mask = 0; 233 bool want_iref = false; 234 struct fsnotify_mark *mark; 235 236 assert_spin_locked(&conn->lock); 237 /* We can get detached connector here when inode is getting unlinked. */ 238 if (!fsnotify_valid_obj_type(conn->type)) 239 return NULL; 240 hlist_for_each_entry(mark, &conn->list, obj_list) { 241 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) 242 continue; 243 new_mask |= fsnotify_calc_mask(mark); 244 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE && 245 !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) 246 want_iref = true; 247 } 248 /* 249 * We use WRITE_ONCE() to prevent silly compiler optimizations from 250 * confusing readers not holding conn->lock with partial updates. 251 */ 252 WRITE_ONCE(*fsnotify_conn_mask_p(conn), new_mask); 253 254 return fsnotify_update_iref(conn, want_iref); 255 } 256 257 static bool fsnotify_conn_watches_children( 258 struct fsnotify_mark_connector *conn) 259 { 260 if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) 261 return false; 262 263 return fsnotify_inode_watches_children(fsnotify_conn_inode(conn)); 264 } 265 266 static void fsnotify_conn_set_children_dentry_flags( 267 struct fsnotify_mark_connector *conn) 268 { 269 if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) 270 return; 271 272 fsnotify_set_children_dentry_flags(fsnotify_conn_inode(conn)); 273 } 274 275 /* 276 * Calculate mask of events for a list of marks. The caller must make sure 277 * connector and connector->obj cannot disappear under us. Callers achieve 278 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this 279 * list. 280 */ 281 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 282 { 283 bool update_children; 284 285 if (!conn) 286 return; 287 288 spin_lock(&conn->lock); 289 update_children = !fsnotify_conn_watches_children(conn); 290 __fsnotify_recalc_mask(conn); 291 update_children &= fsnotify_conn_watches_children(conn); 292 spin_unlock(&conn->lock); 293 /* 294 * Set children's PARENT_WATCHED flags only if parent started watching. 295 * When parent stops watching, we clear false positive PARENT_WATCHED 296 * flags lazily in __fsnotify_parent(). 297 */ 298 if (update_children) 299 fsnotify_conn_set_children_dentry_flags(conn); 300 } 301 302 /* Free all connectors queued for freeing once SRCU period ends */ 303 static void fsnotify_connector_destroy_workfn(struct work_struct *work) 304 { 305 struct fsnotify_mark_connector *conn, *free; 306 307 spin_lock(&destroy_lock); 308 conn = connector_destroy_list; 309 connector_destroy_list = NULL; 310 spin_unlock(&destroy_lock); 311 312 synchronize_srcu(&fsnotify_mark_srcu); 313 while (conn) { 314 free = conn; 315 conn = conn->destroy_next; 316 kmem_cache_free(fsnotify_mark_connector_cachep, free); 317 } 318 } 319 320 static void *fsnotify_detach_connector_from_object( 321 struct fsnotify_mark_connector *conn, 322 unsigned int *type) 323 { 324 fsnotify_connp_t *connp = fsnotify_object_connp(conn->obj, conn->type); 325 struct super_block *sb = fsnotify_connector_sb(conn); 326 struct inode *inode = NULL; 327 328 *type = conn->type; 329 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) 330 return NULL; 331 332 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { 333 inode = fsnotify_conn_inode(conn); 334 inode->i_fsnotify_mask = 0; 335 336 /* Unpin inode when detaching from connector */ 337 if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF)) 338 inode = NULL; 339 } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 340 fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; 341 } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { 342 fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; 343 } 344 345 rcu_assign_pointer(*connp, NULL); 346 conn->obj = NULL; 347 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 348 fsnotify_update_sb_watchers(sb, conn); 349 350 return inode; 351 } 352 353 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) 354 { 355 struct fsnotify_group *group = mark->group; 356 357 if (WARN_ON_ONCE(!group)) 358 return; 359 group->ops->free_mark(mark); 360 fsnotify_put_group(group); 361 } 362 363 /* Drop object reference originally held by a connector */ 364 static void fsnotify_drop_object(unsigned int type, void *objp) 365 { 366 if (!objp) 367 return; 368 /* Currently only inode references are passed to be dropped */ 369 if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE)) 370 return; 371 fsnotify_put_inode_ref(objp); 372 } 373 374 void fsnotify_put_mark(struct fsnotify_mark *mark) 375 { 376 struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector); 377 void *objp = NULL; 378 unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED; 379 bool free_conn = false; 380 381 /* Catch marks that were actually never attached to object */ 382 if (!conn) { 383 if (refcount_dec_and_test(&mark->refcnt)) 384 fsnotify_final_mark_destroy(mark); 385 return; 386 } 387 388 /* 389 * We have to be careful so that traversals of obj_list under lock can 390 * safely grab mark reference. 391 */ 392 if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock)) 393 return; 394 395 hlist_del_init_rcu(&mark->obj_list); 396 if (hlist_empty(&conn->list)) { 397 objp = fsnotify_detach_connector_from_object(conn, &type); 398 free_conn = true; 399 } else { 400 struct super_block *sb = fsnotify_connector_sb(conn); 401 402 /* Update watched objects after detaching mark */ 403 if (sb) 404 fsnotify_update_sb_watchers(sb, conn); 405 objp = __fsnotify_recalc_mask(conn); 406 type = conn->type; 407 } 408 WRITE_ONCE(mark->connector, NULL); 409 spin_unlock(&conn->lock); 410 411 fsnotify_drop_object(type, objp); 412 413 if (free_conn) { 414 spin_lock(&destroy_lock); 415 conn->destroy_next = connector_destroy_list; 416 connector_destroy_list = conn; 417 spin_unlock(&destroy_lock); 418 queue_work(system_unbound_wq, &connector_reaper_work); 419 } 420 /* 421 * Note that we didn't update flags telling whether inode cares about 422 * what's happening with children. We update these flags from 423 * __fsnotify_parent() lazily when next event happens on one of our 424 * children. 425 */ 426 spin_lock(&destroy_lock); 427 list_add(&mark->g_list, &destroy_list); 428 spin_unlock(&destroy_lock); 429 queue_delayed_work(system_unbound_wq, &reaper_work, 430 FSNOTIFY_REAPER_DELAY); 431 } 432 EXPORT_SYMBOL_GPL(fsnotify_put_mark); 433 434 /* 435 * Get mark reference when we found the mark via lockless traversal of object 436 * list. Mark can be already removed from the list by now and on its way to be 437 * destroyed once SRCU period ends. 438 * 439 * Also pin the group so it doesn't disappear under us. 440 */ 441 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) 442 { 443 if (!mark) 444 return true; 445 446 if (refcount_inc_not_zero(&mark->refcnt)) { 447 spin_lock(&mark->lock); 448 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { 449 /* mark is attached, group is still alive then */ 450 atomic_inc(&mark->group->user_waits); 451 spin_unlock(&mark->lock); 452 return true; 453 } 454 spin_unlock(&mark->lock); 455 fsnotify_put_mark(mark); 456 } 457 return false; 458 } 459 460 /* 461 * Puts marks and wakes up group destruction if necessary. 462 * 463 * Pairs with fsnotify_get_mark_safe() 464 */ 465 static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) 466 { 467 if (mark) { 468 struct fsnotify_group *group = mark->group; 469 470 fsnotify_put_mark(mark); 471 /* 472 * We abuse notification_waitq on group shutdown for waiting for 473 * all marks pinned when waiting for userspace. 474 */ 475 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 476 wake_up(&group->notification_waitq); 477 } 478 } 479 480 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) 481 __releases(&fsnotify_mark_srcu) 482 { 483 int type; 484 485 fsnotify_foreach_iter_type(type) { 486 /* This can fail if mark is being removed */ 487 if (!fsnotify_get_mark_safe(iter_info->marks[type])) { 488 __release(&fsnotify_mark_srcu); 489 goto fail; 490 } 491 } 492 493 /* 494 * Now that both marks are pinned by refcount in the inode / vfsmount 495 * lists, we can drop SRCU lock, and safely resume the list iteration 496 * once userspace returns. 497 */ 498 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); 499 500 return true; 501 502 fail: 503 for (type--; type >= 0; type--) 504 fsnotify_put_mark_wake(iter_info->marks[type]); 505 return false; 506 } 507 508 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) 509 __acquires(&fsnotify_mark_srcu) 510 { 511 int type; 512 513 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 514 fsnotify_foreach_iter_type(type) 515 fsnotify_put_mark_wake(iter_info->marks[type]); 516 } 517 518 /* 519 * Mark mark as detached, remove it from group list. Mark still stays in object 520 * list until its last reference is dropped. Note that we rely on mark being 521 * removed from group list before corresponding reference to it is dropped. In 522 * particular we rely on mark->connector being valid while we hold 523 * group->mark_mutex if we found the mark through g_list. 524 * 525 * Must be called with group->mark_mutex held. The caller must either hold 526 * reference to the mark or be protected by fsnotify_mark_srcu. 527 */ 528 void fsnotify_detach_mark(struct fsnotify_mark *mark) 529 { 530 fsnotify_group_assert_locked(mark->group); 531 WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && 532 refcount_read(&mark->refcnt) < 1 + 533 !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); 534 535 spin_lock(&mark->lock); 536 /* something else already called this function on this mark */ 537 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 538 spin_unlock(&mark->lock); 539 return; 540 } 541 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 542 list_del_init(&mark->g_list); 543 spin_unlock(&mark->lock); 544 545 /* Drop mark reference acquired in fsnotify_add_mark_locked() */ 546 fsnotify_put_mark(mark); 547 } 548 549 /* 550 * Free fsnotify mark. The mark is actually only marked as being freed. The 551 * freeing is actually happening only once last reference to the mark is 552 * dropped from a workqueue which first waits for srcu period end. 553 * 554 * Caller must have a reference to the mark or be protected by 555 * fsnotify_mark_srcu. 556 */ 557 void fsnotify_free_mark(struct fsnotify_mark *mark) 558 { 559 struct fsnotify_group *group = mark->group; 560 561 spin_lock(&mark->lock); 562 /* something else already called this function on this mark */ 563 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 564 spin_unlock(&mark->lock); 565 return; 566 } 567 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 568 spin_unlock(&mark->lock); 569 570 /* 571 * Some groups like to know that marks are being freed. This is a 572 * callback to the group function to let it know that this mark 573 * is being freed. 574 */ 575 if (group->ops->freeing_mark) 576 group->ops->freeing_mark(mark, group); 577 } 578 579 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 580 struct fsnotify_group *group) 581 { 582 fsnotify_group_lock(group); 583 fsnotify_detach_mark(mark); 584 fsnotify_group_unlock(group); 585 fsnotify_free_mark(mark); 586 } 587 EXPORT_SYMBOL_GPL(fsnotify_destroy_mark); 588 589 /* 590 * Sorting function for lists of fsnotify marks. 591 * 592 * Fanotify supports different notification classes (reflected as priority of 593 * notification group). Events shall be passed to notification groups in 594 * decreasing priority order. To achieve this marks in notification lists for 595 * inodes and vfsmounts are sorted so that priorities of corresponding groups 596 * are descending. 597 * 598 * Furthermore correct handling of the ignore mask requires processing inode 599 * and vfsmount marks of each group together. Using the group address as 600 * further sort criterion provides a unique sorting order and thus we can 601 * merge inode and vfsmount lists of marks in linear time and find groups 602 * present in both lists. 603 * 604 * A return value of 1 signifies that b has priority over a. 605 * A return value of 0 signifies that the two marks have to be handled together. 606 * A return value of -1 signifies that a has priority over b. 607 */ 608 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 609 { 610 if (a == b) 611 return 0; 612 if (!a) 613 return 1; 614 if (!b) 615 return -1; 616 if (a->priority < b->priority) 617 return 1; 618 if (a->priority > b->priority) 619 return -1; 620 if (a < b) 621 return 1; 622 return -1; 623 } 624 625 static int fsnotify_attach_info_to_sb(struct super_block *sb) 626 { 627 struct fsnotify_sb_info *sbinfo; 628 629 /* sb info is freed on fsnotify_sb_delete() */ 630 sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL); 631 if (!sbinfo) 632 return -ENOMEM; 633 634 /* 635 * cmpxchg() provides the barrier so that callers of fsnotify_sb_info() 636 * will observe an initialized structure 637 */ 638 if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) { 639 /* Someone else created sbinfo for us */ 640 kfree(sbinfo); 641 } 642 return 0; 643 } 644 645 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, 646 void *obj, unsigned int obj_type) 647 { 648 struct fsnotify_mark_connector *conn; 649 650 conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); 651 if (!conn) 652 return -ENOMEM; 653 spin_lock_init(&conn->lock); 654 INIT_HLIST_HEAD(&conn->list); 655 conn->flags = 0; 656 conn->prio = 0; 657 conn->type = obj_type; 658 conn->obj = obj; 659 660 /* 661 * cmpxchg() provides the barrier so that readers of *connp can see 662 * only initialized structure 663 */ 664 if (cmpxchg(connp, NULL, conn)) { 665 /* Someone else created list structure for us */ 666 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 667 } 668 return 0; 669 } 670 671 /* 672 * Get mark connector, make sure it is alive and return with its lock held. 673 * This is for users that get connector pointer from inode or mount. Users that 674 * hold reference to a mark on the list may directly lock connector->lock as 675 * they are sure list cannot go away under them. 676 */ 677 static struct fsnotify_mark_connector *fsnotify_grab_connector( 678 fsnotify_connp_t *connp) 679 { 680 struct fsnotify_mark_connector *conn; 681 int idx; 682 683 idx = srcu_read_lock(&fsnotify_mark_srcu); 684 conn = srcu_dereference(*connp, &fsnotify_mark_srcu); 685 if (!conn) 686 goto out; 687 spin_lock(&conn->lock); 688 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) { 689 spin_unlock(&conn->lock); 690 srcu_read_unlock(&fsnotify_mark_srcu, idx); 691 return NULL; 692 } 693 out: 694 srcu_read_unlock(&fsnotify_mark_srcu, idx); 695 return conn; 696 } 697 698 /* 699 * Add mark into proper place in given list of marks. These marks may be used 700 * for the fsnotify backend to determine which event types should be delivered 701 * to which group and for which inodes. These marks are ordered according to 702 * priority, highest number first, and then by the group's location in memory. 703 */ 704 static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, 705 unsigned int obj_type, int add_flags) 706 { 707 struct super_block *sb = fsnotify_object_sb(obj, obj_type); 708 struct fsnotify_mark *lmark, *last = NULL; 709 struct fsnotify_mark_connector *conn; 710 fsnotify_connp_t *connp; 711 int cmp; 712 int err = 0; 713 714 if (WARN_ON(!fsnotify_valid_obj_type(obj_type))) 715 return -EINVAL; 716 717 /* 718 * Attach the sb info before attaching a connector to any object on sb. 719 * The sb info will remain attached as long as sb lives. 720 */ 721 if (!fsnotify_sb_info(sb)) { 722 err = fsnotify_attach_info_to_sb(sb); 723 if (err) 724 return err; 725 } 726 727 connp = fsnotify_object_connp(obj, obj_type); 728 restart: 729 spin_lock(&mark->lock); 730 conn = fsnotify_grab_connector(connp); 731 if (!conn) { 732 spin_unlock(&mark->lock); 733 err = fsnotify_attach_connector_to_object(connp, obj, obj_type); 734 if (err) 735 return err; 736 goto restart; 737 } 738 739 /* is mark the first mark? */ 740 if (hlist_empty(&conn->list)) { 741 hlist_add_head_rcu(&mark->obj_list, &conn->list); 742 goto added; 743 } 744 745 /* should mark be in the middle of the current list? */ 746 hlist_for_each_entry(lmark, &conn->list, obj_list) { 747 last = lmark; 748 749 if ((lmark->group == mark->group) && 750 (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && 751 !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) { 752 err = -EEXIST; 753 goto out_err; 754 } 755 756 cmp = fsnotify_compare_groups(lmark->group, mark->group); 757 if (cmp >= 0) { 758 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 759 goto added; 760 } 761 } 762 763 BUG_ON(last == NULL); 764 /* mark should be the last entry. last is the current last entry */ 765 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 766 added: 767 fsnotify_update_sb_watchers(sb, conn); 768 /* 769 * Since connector is attached to object using cmpxchg() we are 770 * guaranteed that connector initialization is fully visible by anyone 771 * seeing mark->connector set. 772 */ 773 WRITE_ONCE(mark->connector, conn); 774 out_err: 775 spin_unlock(&conn->lock); 776 spin_unlock(&mark->lock); 777 return err; 778 } 779 780 /* 781 * Attach an initialized mark to a given group and fs object. 782 * These marks may be used for the fsnotify backend to determine which 783 * event types should be delivered to which group. 784 */ 785 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 786 void *obj, unsigned int obj_type, 787 int add_flags) 788 { 789 struct fsnotify_group *group = mark->group; 790 int ret = 0; 791 792 fsnotify_group_assert_locked(group); 793 794 /* 795 * LOCKING ORDER!!!! 796 * group->mark_mutex 797 * mark->lock 798 * mark->connector->lock 799 */ 800 spin_lock(&mark->lock); 801 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 802 803 list_add(&mark->g_list, &group->marks_list); 804 fsnotify_get_mark(mark); /* for g_list */ 805 spin_unlock(&mark->lock); 806 807 ret = fsnotify_add_mark_list(mark, obj, obj_type, add_flags); 808 if (ret) 809 goto err; 810 811 fsnotify_recalc_mask(mark->connector); 812 813 return ret; 814 err: 815 spin_lock(&mark->lock); 816 mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | 817 FSNOTIFY_MARK_FLAG_ATTACHED); 818 list_del_init(&mark->g_list); 819 spin_unlock(&mark->lock); 820 821 fsnotify_put_mark(mark); 822 return ret; 823 } 824 825 int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, 826 unsigned int obj_type, int add_flags) 827 { 828 int ret; 829 struct fsnotify_group *group = mark->group; 830 831 fsnotify_group_lock(group); 832 ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags); 833 fsnotify_group_unlock(group); 834 return ret; 835 } 836 EXPORT_SYMBOL_GPL(fsnotify_add_mark); 837 838 /* 839 * Given a list of marks, find the mark associated with given group. If found 840 * take a reference to that mark and return it, else return NULL. 841 */ 842 struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, 843 struct fsnotify_group *group) 844 { 845 fsnotify_connp_t *connp = fsnotify_object_connp(obj, obj_type); 846 struct fsnotify_mark_connector *conn; 847 struct fsnotify_mark *mark; 848 849 if (!connp) 850 return NULL; 851 852 conn = fsnotify_grab_connector(connp); 853 if (!conn) 854 return NULL; 855 856 hlist_for_each_entry(mark, &conn->list, obj_list) { 857 if (mark->group == group && 858 (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 859 fsnotify_get_mark(mark); 860 spin_unlock(&conn->lock); 861 return mark; 862 } 863 } 864 spin_unlock(&conn->lock); 865 return NULL; 866 } 867 EXPORT_SYMBOL_GPL(fsnotify_find_mark); 868 869 /* Clear any marks in a group with given type mask */ 870 void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 871 unsigned int obj_type) 872 { 873 struct fsnotify_mark *lmark, *mark; 874 LIST_HEAD(to_free); 875 struct list_head *head = &to_free; 876 877 /* Skip selection step if we want to clear all marks. */ 878 if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) { 879 head = &group->marks_list; 880 goto clear; 881 } 882 /* 883 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 884 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 885 * to_free list so we have to use mark_mutex even when accessing that 886 * list. And freeing mark requires us to drop mark_mutex. So we can 887 * reliably free only the first mark in the list. That's why we first 888 * move marks to free to to_free list in one go and then free marks in 889 * to_free list one by one. 890 */ 891 fsnotify_group_lock(group); 892 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 893 if (mark->connector->type == obj_type) 894 list_move(&mark->g_list, &to_free); 895 } 896 fsnotify_group_unlock(group); 897 898 clear: 899 while (1) { 900 fsnotify_group_lock(group); 901 if (list_empty(head)) { 902 fsnotify_group_unlock(group); 903 break; 904 } 905 mark = list_first_entry(head, struct fsnotify_mark, g_list); 906 fsnotify_get_mark(mark); 907 fsnotify_detach_mark(mark); 908 fsnotify_group_unlock(group); 909 fsnotify_free_mark(mark); 910 fsnotify_put_mark(mark); 911 } 912 } 913 914 /* Destroy all marks attached to an object via connector */ 915 void fsnotify_destroy_marks(fsnotify_connp_t *connp) 916 { 917 struct fsnotify_mark_connector *conn; 918 struct fsnotify_mark *mark, *old_mark = NULL; 919 void *objp; 920 unsigned int type; 921 922 conn = fsnotify_grab_connector(connp); 923 if (!conn) 924 return; 925 /* 926 * We have to be careful since we can race with e.g. 927 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the 928 * list can get modified. However we are holding mark reference and 929 * thus our mark cannot be removed from obj_list so we can continue 930 * iteration after regaining conn->lock. 931 */ 932 hlist_for_each_entry(mark, &conn->list, obj_list) { 933 fsnotify_get_mark(mark); 934 spin_unlock(&conn->lock); 935 if (old_mark) 936 fsnotify_put_mark(old_mark); 937 old_mark = mark; 938 fsnotify_destroy_mark(mark, mark->group); 939 spin_lock(&conn->lock); 940 } 941 /* 942 * Detach list from object now so that we don't pin inode until all 943 * mark references get dropped. It would lead to strange results such 944 * as delaying inode deletion or blocking unmount. 945 */ 946 objp = fsnotify_detach_connector_from_object(conn, &type); 947 spin_unlock(&conn->lock); 948 if (old_mark) 949 fsnotify_put_mark(old_mark); 950 fsnotify_drop_object(type, objp); 951 } 952 953 /* 954 * Nothing fancy, just initialize lists and locks and counters. 955 */ 956 void fsnotify_init_mark(struct fsnotify_mark *mark, 957 struct fsnotify_group *group) 958 { 959 memset(mark, 0, sizeof(*mark)); 960 spin_lock_init(&mark->lock); 961 refcount_set(&mark->refcnt, 1); 962 fsnotify_get_group(group); 963 mark->group = group; 964 WRITE_ONCE(mark->connector, NULL); 965 } 966 EXPORT_SYMBOL_GPL(fsnotify_init_mark); 967 968 /* 969 * Destroy all marks in destroy_list, waits for SRCU period to finish before 970 * actually freeing marks. 971 */ 972 static void fsnotify_mark_destroy_workfn(struct work_struct *work) 973 { 974 struct fsnotify_mark *mark, *next; 975 struct list_head private_destroy_list; 976 977 spin_lock(&destroy_lock); 978 /* exchange the list head */ 979 list_replace_init(&destroy_list, &private_destroy_list); 980 spin_unlock(&destroy_lock); 981 982 synchronize_srcu(&fsnotify_mark_srcu); 983 984 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 985 list_del_init(&mark->g_list); 986 fsnotify_final_mark_destroy(mark); 987 } 988 } 989 990 /* Wait for all marks queued for destruction to be actually destroyed */ 991 void fsnotify_wait_marks_destroyed(void) 992 { 993 flush_delayed_work(&reaper_work); 994 } 995 EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed); 996