1 /* 2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2, or (at your option) 7 * any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; see the file COPYING. If not, write to 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 /* 20 * fsnotify inode mark locking/lifetime/and refcnting 21 * 22 * REFCNT: 23 * The group->recnt and mark->refcnt tell how many "things" in the kernel 24 * currently are referencing the objects. Both kind of objects typically will 25 * live inside the kernel with a refcnt of 2, one for its creation and one for 26 * the reference a group and a mark hold to each other. 27 * If you are holding the appropriate locks, you can take a reference and the 28 * object itself is guaranteed to survive until the reference is dropped. 29 * 30 * LOCKING: 31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 32 * in order as follows: 33 * 34 * group->mark_mutex 35 * mark->lock 36 * mark->connector->lock 37 * 38 * group->mark_mutex protects the marks_list anchored inside a given group and 39 * each mark is hooked via the g_list. It also protects the groups private 40 * data (i.e group limits). 41 42 * mark->lock protects the marks attributes like its masks and flags. 43 * Furthermore it protects the access to a reference of the group that the mark 44 * is assigned to as well as the access to a reference of the inode/vfsmount 45 * that is being watched by the mark. 46 * 47 * mark->connector->lock protects the list of marks anchored inside an 48 * inode / vfsmount and each mark is hooked via the i_list. 49 * 50 * A list of notification marks relating to inode / mnt is contained in 51 * fsnotify_mark_connector. That structure is alive as long as there are any 52 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets 53 * detached from fsnotify_mark_connector when last reference to the mark is 54 * dropped. Thus having mark reference is enough to protect mark->connector 55 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also 56 * because we remove mark from g_list before dropping mark reference associated 57 * with that, any mark found through g_list is guaranteed to have 58 * mark->connector set until we drop group->mark_mutex. 59 * 60 * LIFETIME: 61 * Inode marks survive between when they are added to an inode and when their 62 * refcnt==0. Marks are also protected by fsnotify_mark_srcu. 63 * 64 * The inode mark can be cleared for a number of different reasons including: 65 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 66 * - The inode is being evicted from cache. (fsnotify_inode_delete) 67 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 68 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 69 * - The fsnotify_group associated with the mark is going away and all such marks 70 * need to be cleaned up. (fsnotify_clear_marks_by_group) 71 * 72 * This has the very interesting property of being able to run concurrently with 73 * any (or all) other directions. 74 */ 75 76 #include <linux/fs.h> 77 #include <linux/init.h> 78 #include <linux/kernel.h> 79 #include <linux/kthread.h> 80 #include <linux/module.h> 81 #include <linux/mutex.h> 82 #include <linux/slab.h> 83 #include <linux/spinlock.h> 84 #include <linux/srcu.h> 85 86 #include <linux/atomic.h> 87 88 #include <linux/fsnotify_backend.h> 89 #include "fsnotify.h" 90 91 #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ 92 93 struct srcu_struct fsnotify_mark_srcu; 94 struct kmem_cache *fsnotify_mark_connector_cachep; 95 96 static DEFINE_SPINLOCK(destroy_lock); 97 static LIST_HEAD(destroy_list); 98 static struct fsnotify_mark_connector *connector_destroy_list; 99 100 static void fsnotify_mark_destroy_workfn(struct work_struct *work); 101 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); 102 103 static void fsnotify_connector_destroy_workfn(struct work_struct *work); 104 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); 105 106 void fsnotify_get_mark(struct fsnotify_mark *mark) 107 { 108 WARN_ON_ONCE(!refcount_read(&mark->refcnt)); 109 refcount_inc(&mark->refcnt); 110 } 111 112 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 113 { 114 u32 new_mask = 0; 115 struct fsnotify_mark *mark; 116 117 assert_spin_locked(&conn->lock); 118 hlist_for_each_entry(mark, &conn->list, obj_list) { 119 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) 120 new_mask |= mark->mask; 121 } 122 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 123 conn->inode->i_fsnotify_mask = new_mask; 124 else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) 125 real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; 126 } 127 128 /* 129 * Calculate mask of events for a list of marks. The caller must make sure 130 * connector and connector->inode cannot disappear under us. Callers achieve 131 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this 132 * list. 133 */ 134 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 135 { 136 if (!conn) 137 return; 138 139 spin_lock(&conn->lock); 140 __fsnotify_recalc_mask(conn); 141 spin_unlock(&conn->lock); 142 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 143 __fsnotify_update_child_dentry_flags(conn->inode); 144 } 145 146 /* Free all connectors queued for freeing once SRCU period ends */ 147 static void fsnotify_connector_destroy_workfn(struct work_struct *work) 148 { 149 struct fsnotify_mark_connector *conn, *free; 150 151 spin_lock(&destroy_lock); 152 conn = connector_destroy_list; 153 connector_destroy_list = NULL; 154 spin_unlock(&destroy_lock); 155 156 synchronize_srcu(&fsnotify_mark_srcu); 157 while (conn) { 158 free = conn; 159 conn = conn->destroy_next; 160 kmem_cache_free(fsnotify_mark_connector_cachep, free); 161 } 162 } 163 164 static struct inode *fsnotify_detach_connector_from_object( 165 struct fsnotify_mark_connector *conn) 166 { 167 struct inode *inode = NULL; 168 169 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { 170 inode = conn->inode; 171 rcu_assign_pointer(inode->i_fsnotify_marks, NULL); 172 inode->i_fsnotify_mask = 0; 173 conn->inode = NULL; 174 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 175 } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 176 rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, 177 NULL); 178 real_mount(conn->mnt)->mnt_fsnotify_mask = 0; 179 conn->mnt = NULL; 180 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 181 } 182 183 return inode; 184 } 185 186 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) 187 { 188 struct fsnotify_group *group = mark->group; 189 190 if (WARN_ON_ONCE(!group)) 191 return; 192 group->ops->free_mark(mark); 193 fsnotify_put_group(group); 194 } 195 196 void fsnotify_put_mark(struct fsnotify_mark *mark) 197 { 198 struct fsnotify_mark_connector *conn; 199 struct inode *inode = NULL; 200 bool free_conn = false; 201 202 /* Catch marks that were actually never attached to object */ 203 if (!mark->connector) { 204 if (refcount_dec_and_test(&mark->refcnt)) 205 fsnotify_final_mark_destroy(mark); 206 return; 207 } 208 209 /* 210 * We have to be careful so that traversals of obj_list under lock can 211 * safely grab mark reference. 212 */ 213 if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) 214 return; 215 216 conn = mark->connector; 217 hlist_del_init_rcu(&mark->obj_list); 218 if (hlist_empty(&conn->list)) { 219 inode = fsnotify_detach_connector_from_object(conn); 220 free_conn = true; 221 } else { 222 __fsnotify_recalc_mask(conn); 223 } 224 mark->connector = NULL; 225 spin_unlock(&conn->lock); 226 227 iput(inode); 228 229 if (free_conn) { 230 spin_lock(&destroy_lock); 231 conn->destroy_next = connector_destroy_list; 232 connector_destroy_list = conn; 233 spin_unlock(&destroy_lock); 234 queue_work(system_unbound_wq, &connector_reaper_work); 235 } 236 /* 237 * Note that we didn't update flags telling whether inode cares about 238 * what's happening with children. We update these flags from 239 * __fsnotify_parent() lazily when next event happens on one of our 240 * children. 241 */ 242 spin_lock(&destroy_lock); 243 list_add(&mark->g_list, &destroy_list); 244 spin_unlock(&destroy_lock); 245 queue_delayed_work(system_unbound_wq, &reaper_work, 246 FSNOTIFY_REAPER_DELAY); 247 } 248 249 /* 250 * Get mark reference when we found the mark via lockless traversal of object 251 * list. Mark can be already removed from the list by now and on its way to be 252 * destroyed once SRCU period ends. 253 * 254 * Also pin the group so it doesn't disappear under us. 255 */ 256 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) 257 { 258 if (!mark) 259 return true; 260 261 if (refcount_inc_not_zero(&mark->refcnt)) { 262 spin_lock(&mark->lock); 263 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { 264 /* mark is attached, group is still alive then */ 265 atomic_inc(&mark->group->user_waits); 266 spin_unlock(&mark->lock); 267 return true; 268 } 269 spin_unlock(&mark->lock); 270 fsnotify_put_mark(mark); 271 } 272 return false; 273 } 274 275 /* 276 * Puts marks and wakes up group destruction if necessary. 277 * 278 * Pairs with fsnotify_get_mark_safe() 279 */ 280 static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) 281 { 282 if (mark) { 283 struct fsnotify_group *group = mark->group; 284 285 fsnotify_put_mark(mark); 286 /* 287 * We abuse notification_waitq on group shutdown for waiting for 288 * all marks pinned when waiting for userspace. 289 */ 290 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 291 wake_up(&group->notification_waitq); 292 } 293 } 294 295 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) 296 { 297 int type; 298 299 fsnotify_foreach_obj_type(type) { 300 /* This can fail if mark is being removed */ 301 if (!fsnotify_get_mark_safe(iter_info->marks[type])) 302 goto fail; 303 } 304 305 /* 306 * Now that both marks are pinned by refcount in the inode / vfsmount 307 * lists, we can drop SRCU lock, and safely resume the list iteration 308 * once userspace returns. 309 */ 310 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); 311 312 return true; 313 314 fail: 315 for (type--; type >= 0; type--) 316 fsnotify_put_mark_wake(iter_info->marks[type]); 317 return false; 318 } 319 320 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) 321 { 322 int type; 323 324 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 325 fsnotify_foreach_obj_type(type) 326 fsnotify_put_mark_wake(iter_info->marks[type]); 327 } 328 329 /* 330 * Mark mark as detached, remove it from group list. Mark still stays in object 331 * list until its last reference is dropped. Note that we rely on mark being 332 * removed from group list before corresponding reference to it is dropped. In 333 * particular we rely on mark->connector being valid while we hold 334 * group->mark_mutex if we found the mark through g_list. 335 * 336 * Must be called with group->mark_mutex held. The caller must either hold 337 * reference to the mark or be protected by fsnotify_mark_srcu. 338 */ 339 void fsnotify_detach_mark(struct fsnotify_mark *mark) 340 { 341 struct fsnotify_group *group = mark->group; 342 343 WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); 344 WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && 345 refcount_read(&mark->refcnt) < 1 + 346 !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); 347 348 spin_lock(&mark->lock); 349 /* something else already called this function on this mark */ 350 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 351 spin_unlock(&mark->lock); 352 return; 353 } 354 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 355 list_del_init(&mark->g_list); 356 spin_unlock(&mark->lock); 357 358 atomic_dec(&group->num_marks); 359 360 /* Drop mark reference acquired in fsnotify_add_mark_locked() */ 361 fsnotify_put_mark(mark); 362 } 363 364 /* 365 * Free fsnotify mark. The mark is actually only marked as being freed. The 366 * freeing is actually happening only once last reference to the mark is 367 * dropped from a workqueue which first waits for srcu period end. 368 * 369 * Caller must have a reference to the mark or be protected by 370 * fsnotify_mark_srcu. 371 */ 372 void fsnotify_free_mark(struct fsnotify_mark *mark) 373 { 374 struct fsnotify_group *group = mark->group; 375 376 spin_lock(&mark->lock); 377 /* something else already called this function on this mark */ 378 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 379 spin_unlock(&mark->lock); 380 return; 381 } 382 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 383 spin_unlock(&mark->lock); 384 385 /* 386 * Some groups like to know that marks are being freed. This is a 387 * callback to the group function to let it know that this mark 388 * is being freed. 389 */ 390 if (group->ops->freeing_mark) 391 group->ops->freeing_mark(mark, group); 392 } 393 394 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 395 struct fsnotify_group *group) 396 { 397 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 398 fsnotify_detach_mark(mark); 399 mutex_unlock(&group->mark_mutex); 400 fsnotify_free_mark(mark); 401 } 402 403 /* 404 * Sorting function for lists of fsnotify marks. 405 * 406 * Fanotify supports different notification classes (reflected as priority of 407 * notification group). Events shall be passed to notification groups in 408 * decreasing priority order. To achieve this marks in notification lists for 409 * inodes and vfsmounts are sorted so that priorities of corresponding groups 410 * are descending. 411 * 412 * Furthermore correct handling of the ignore mask requires processing inode 413 * and vfsmount marks of each group together. Using the group address as 414 * further sort criterion provides a unique sorting order and thus we can 415 * merge inode and vfsmount lists of marks in linear time and find groups 416 * present in both lists. 417 * 418 * A return value of 1 signifies that b has priority over a. 419 * A return value of 0 signifies that the two marks have to be handled together. 420 * A return value of -1 signifies that a has priority over b. 421 */ 422 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 423 { 424 if (a == b) 425 return 0; 426 if (!a) 427 return 1; 428 if (!b) 429 return -1; 430 if (a->priority < b->priority) 431 return 1; 432 if (a->priority > b->priority) 433 return -1; 434 if (a < b) 435 return 1; 436 return -1; 437 } 438 439 static int fsnotify_attach_connector_to_object( 440 struct fsnotify_mark_connector __rcu **connp, 441 struct inode *inode, 442 struct vfsmount *mnt) 443 { 444 struct fsnotify_mark_connector *conn; 445 446 conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); 447 if (!conn) 448 return -ENOMEM; 449 spin_lock_init(&conn->lock); 450 INIT_HLIST_HEAD(&conn->list); 451 if (inode) { 452 conn->type = FSNOTIFY_OBJ_TYPE_INODE; 453 conn->inode = igrab(inode); 454 } else { 455 conn->type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; 456 conn->mnt = mnt; 457 } 458 /* 459 * cmpxchg() provides the barrier so that readers of *connp can see 460 * only initialized structure 461 */ 462 if (cmpxchg(connp, NULL, conn)) { 463 /* Someone else created list structure for us */ 464 if (inode) 465 iput(inode); 466 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 467 } 468 469 return 0; 470 } 471 472 /* 473 * Get mark connector, make sure it is alive and return with its lock held. 474 * This is for users that get connector pointer from inode or mount. Users that 475 * hold reference to a mark on the list may directly lock connector->lock as 476 * they are sure list cannot go away under them. 477 */ 478 static struct fsnotify_mark_connector *fsnotify_grab_connector( 479 struct fsnotify_mark_connector __rcu **connp) 480 { 481 struct fsnotify_mark_connector *conn; 482 int idx; 483 484 idx = srcu_read_lock(&fsnotify_mark_srcu); 485 conn = srcu_dereference(*connp, &fsnotify_mark_srcu); 486 if (!conn) 487 goto out; 488 spin_lock(&conn->lock); 489 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) { 490 spin_unlock(&conn->lock); 491 srcu_read_unlock(&fsnotify_mark_srcu, idx); 492 return NULL; 493 } 494 out: 495 srcu_read_unlock(&fsnotify_mark_srcu, idx); 496 return conn; 497 } 498 499 /* 500 * Add mark into proper place in given list of marks. These marks may be used 501 * for the fsnotify backend to determine which event types should be delivered 502 * to which group and for which inodes. These marks are ordered according to 503 * priority, highest number first, and then by the group's location in memory. 504 */ 505 static int fsnotify_add_mark_list(struct fsnotify_mark *mark, 506 struct inode *inode, struct vfsmount *mnt, 507 int allow_dups) 508 { 509 struct fsnotify_mark *lmark, *last = NULL; 510 struct fsnotify_mark_connector *conn; 511 struct fsnotify_mark_connector __rcu **connp; 512 int cmp; 513 int err = 0; 514 515 if (WARN_ON(!inode && !mnt)) 516 return -EINVAL; 517 if (inode) 518 connp = &inode->i_fsnotify_marks; 519 else 520 connp = &real_mount(mnt)->mnt_fsnotify_marks; 521 restart: 522 spin_lock(&mark->lock); 523 conn = fsnotify_grab_connector(connp); 524 if (!conn) { 525 spin_unlock(&mark->lock); 526 err = fsnotify_attach_connector_to_object(connp, inode, mnt); 527 if (err) 528 return err; 529 goto restart; 530 } 531 532 /* is mark the first mark? */ 533 if (hlist_empty(&conn->list)) { 534 hlist_add_head_rcu(&mark->obj_list, &conn->list); 535 goto added; 536 } 537 538 /* should mark be in the middle of the current list? */ 539 hlist_for_each_entry(lmark, &conn->list, obj_list) { 540 last = lmark; 541 542 if ((lmark->group == mark->group) && 543 (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && 544 !allow_dups) { 545 err = -EEXIST; 546 goto out_err; 547 } 548 549 cmp = fsnotify_compare_groups(lmark->group, mark->group); 550 if (cmp >= 0) { 551 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 552 goto added; 553 } 554 } 555 556 BUG_ON(last == NULL); 557 /* mark should be the last entry. last is the current last entry */ 558 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 559 added: 560 mark->connector = conn; 561 out_err: 562 spin_unlock(&conn->lock); 563 spin_unlock(&mark->lock); 564 return err; 565 } 566 567 /* 568 * Attach an initialized mark to a given group and fs object. 569 * These marks may be used for the fsnotify backend to determine which 570 * event types should be delivered to which group. 571 */ 572 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, 573 struct vfsmount *mnt, int allow_dups) 574 { 575 struct fsnotify_group *group = mark->group; 576 int ret = 0; 577 578 BUG_ON(inode && mnt); 579 BUG_ON(!inode && !mnt); 580 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 581 582 /* 583 * LOCKING ORDER!!!! 584 * group->mark_mutex 585 * mark->lock 586 * mark->connector->lock 587 */ 588 spin_lock(&mark->lock); 589 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 590 591 list_add(&mark->g_list, &group->marks_list); 592 atomic_inc(&group->num_marks); 593 fsnotify_get_mark(mark); /* for g_list */ 594 spin_unlock(&mark->lock); 595 596 ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); 597 if (ret) 598 goto err; 599 600 if (mark->mask) 601 fsnotify_recalc_mask(mark->connector); 602 603 return ret; 604 err: 605 spin_lock(&mark->lock); 606 mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | 607 FSNOTIFY_MARK_FLAG_ATTACHED); 608 list_del_init(&mark->g_list); 609 spin_unlock(&mark->lock); 610 atomic_dec(&group->num_marks); 611 612 fsnotify_put_mark(mark); 613 return ret; 614 } 615 616 int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, 617 struct vfsmount *mnt, int allow_dups) 618 { 619 int ret; 620 struct fsnotify_group *group = mark->group; 621 622 mutex_lock(&group->mark_mutex); 623 ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); 624 mutex_unlock(&group->mark_mutex); 625 return ret; 626 } 627 628 /* 629 * Given a list of marks, find the mark associated with given group. If found 630 * take a reference to that mark and return it, else return NULL. 631 */ 632 struct fsnotify_mark *fsnotify_find_mark( 633 struct fsnotify_mark_connector __rcu **connp, 634 struct fsnotify_group *group) 635 { 636 struct fsnotify_mark_connector *conn; 637 struct fsnotify_mark *mark; 638 639 conn = fsnotify_grab_connector(connp); 640 if (!conn) 641 return NULL; 642 643 hlist_for_each_entry(mark, &conn->list, obj_list) { 644 if (mark->group == group && 645 (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 646 fsnotify_get_mark(mark); 647 spin_unlock(&conn->lock); 648 return mark; 649 } 650 } 651 spin_unlock(&conn->lock); 652 return NULL; 653 } 654 655 /* Clear any marks in a group with given type mask */ 656 void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 657 unsigned int type_mask) 658 { 659 struct fsnotify_mark *lmark, *mark; 660 LIST_HEAD(to_free); 661 struct list_head *head = &to_free; 662 663 /* Skip selection step if we want to clear all marks. */ 664 if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) { 665 head = &group->marks_list; 666 goto clear; 667 } 668 /* 669 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 670 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 671 * to_free list so we have to use mark_mutex even when accessing that 672 * list. And freeing mark requires us to drop mark_mutex. So we can 673 * reliably free only the first mark in the list. That's why we first 674 * move marks to free to to_free list in one go and then free marks in 675 * to_free list one by one. 676 */ 677 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 678 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 679 if ((1U << mark->connector->type) & type_mask) 680 list_move(&mark->g_list, &to_free); 681 } 682 mutex_unlock(&group->mark_mutex); 683 684 clear: 685 while (1) { 686 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 687 if (list_empty(head)) { 688 mutex_unlock(&group->mark_mutex); 689 break; 690 } 691 mark = list_first_entry(head, struct fsnotify_mark, g_list); 692 fsnotify_get_mark(mark); 693 fsnotify_detach_mark(mark); 694 mutex_unlock(&group->mark_mutex); 695 fsnotify_free_mark(mark); 696 fsnotify_put_mark(mark); 697 } 698 } 699 700 /* Destroy all marks attached to inode / vfsmount */ 701 void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) 702 { 703 struct fsnotify_mark_connector *conn; 704 struct fsnotify_mark *mark, *old_mark = NULL; 705 struct inode *inode; 706 707 conn = fsnotify_grab_connector(connp); 708 if (!conn) 709 return; 710 /* 711 * We have to be careful since we can race with e.g. 712 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the 713 * list can get modified. However we are holding mark reference and 714 * thus our mark cannot be removed from obj_list so we can continue 715 * iteration after regaining conn->lock. 716 */ 717 hlist_for_each_entry(mark, &conn->list, obj_list) { 718 fsnotify_get_mark(mark); 719 spin_unlock(&conn->lock); 720 if (old_mark) 721 fsnotify_put_mark(old_mark); 722 old_mark = mark; 723 fsnotify_destroy_mark(mark, mark->group); 724 spin_lock(&conn->lock); 725 } 726 /* 727 * Detach list from object now so that we don't pin inode until all 728 * mark references get dropped. It would lead to strange results such 729 * as delaying inode deletion or blocking unmount. 730 */ 731 inode = fsnotify_detach_connector_from_object(conn); 732 spin_unlock(&conn->lock); 733 if (old_mark) 734 fsnotify_put_mark(old_mark); 735 iput(inode); 736 } 737 738 /* 739 * Nothing fancy, just initialize lists and locks and counters. 740 */ 741 void fsnotify_init_mark(struct fsnotify_mark *mark, 742 struct fsnotify_group *group) 743 { 744 memset(mark, 0, sizeof(*mark)); 745 spin_lock_init(&mark->lock); 746 refcount_set(&mark->refcnt, 1); 747 fsnotify_get_group(group); 748 mark->group = group; 749 } 750 751 /* 752 * Destroy all marks in destroy_list, waits for SRCU period to finish before 753 * actually freeing marks. 754 */ 755 static void fsnotify_mark_destroy_workfn(struct work_struct *work) 756 { 757 struct fsnotify_mark *mark, *next; 758 struct list_head private_destroy_list; 759 760 spin_lock(&destroy_lock); 761 /* exchange the list head */ 762 list_replace_init(&destroy_list, &private_destroy_list); 763 spin_unlock(&destroy_lock); 764 765 synchronize_srcu(&fsnotify_mark_srcu); 766 767 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 768 list_del_init(&mark->g_list); 769 fsnotify_final_mark_destroy(mark); 770 } 771 } 772 773 /* Wait for all marks queued for destruction to be actually destroyed */ 774 void fsnotify_wait_marks_destroyed(void) 775 { 776 flush_delayed_work(&reaper_work); 777 } 778