1 /* 2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2, or (at your option) 7 * any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; see the file COPYING. If not, write to 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 /* 20 * fsnotify inode mark locking/lifetime/and refcnting 21 * 22 * REFCNT: 23 * The group->recnt and mark->refcnt tell how many "things" in the kernel 24 * currently are referencing the objects. Both kind of objects typically will 25 * live inside the kernel with a refcnt of 2, one for its creation and one for 26 * the reference a group and a mark hold to each other. 27 * If you are holding the appropriate locks, you can take a reference and the 28 * object itself is guaranteed to survive until the reference is dropped. 29 * 30 * LOCKING: 31 * There are 3 locks involved with fsnotify inode marks and they MUST be taken 32 * in order as follows: 33 * 34 * group->mark_mutex 35 * mark->lock 36 * mark->connector->lock 37 * 38 * group->mark_mutex protects the marks_list anchored inside a given group and 39 * each mark is hooked via the g_list. It also protects the groups private 40 * data (i.e group limits). 41 42 * mark->lock protects the marks attributes like its masks and flags. 43 * Furthermore it protects the access to a reference of the group that the mark 44 * is assigned to as well as the access to a reference of the inode/vfsmount 45 * that is being watched by the mark. 46 * 47 * mark->connector->lock protects the list of marks anchored inside an 48 * inode / vfsmount and each mark is hooked via the i_list. 49 * 50 * A list of notification marks relating to inode / mnt is contained in 51 * fsnotify_mark_connector. That structure is alive as long as there are any 52 * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets 53 * detached from fsnotify_mark_connector when last reference to the mark is 54 * dropped. Thus having mark reference is enough to protect mark->connector 55 * pointer and to make sure fsnotify_mark_connector cannot disappear. Also 56 * because we remove mark from g_list before dropping mark reference associated 57 * with that, any mark found through g_list is guaranteed to have 58 * mark->connector set until we drop group->mark_mutex. 59 * 60 * LIFETIME: 61 * Inode marks survive between when they are added to an inode and when their 62 * refcnt==0. Marks are also protected by fsnotify_mark_srcu. 63 * 64 * The inode mark can be cleared for a number of different reasons including: 65 * - The inode is unlinked for the last time. (fsnotify_inode_remove) 66 * - The inode is being evicted from cache. (fsnotify_inode_delete) 67 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) 68 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) 69 * - The fsnotify_group associated with the mark is going away and all such marks 70 * need to be cleaned up. (fsnotify_clear_marks_by_group) 71 * 72 * This has the very interesting property of being able to run concurrently with 73 * any (or all) other directions. 74 */ 75 76 #include <linux/fs.h> 77 #include <linux/init.h> 78 #include <linux/kernel.h> 79 #include <linux/kthread.h> 80 #include <linux/module.h> 81 #include <linux/mutex.h> 82 #include <linux/slab.h> 83 #include <linux/spinlock.h> 84 #include <linux/srcu.h> 85 86 #include <linux/atomic.h> 87 88 #include <linux/fsnotify_backend.h> 89 #include "fsnotify.h" 90 91 #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ 92 93 struct srcu_struct fsnotify_mark_srcu; 94 struct kmem_cache *fsnotify_mark_connector_cachep; 95 96 static DEFINE_SPINLOCK(destroy_lock); 97 static LIST_HEAD(destroy_list); 98 static struct fsnotify_mark_connector *connector_destroy_list; 99 100 static void fsnotify_mark_destroy_workfn(struct work_struct *work); 101 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); 102 103 static void fsnotify_connector_destroy_workfn(struct work_struct *work); 104 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); 105 106 void fsnotify_get_mark(struct fsnotify_mark *mark) 107 { 108 WARN_ON_ONCE(!refcount_read(&mark->refcnt)); 109 refcount_inc(&mark->refcnt); 110 } 111 112 static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) 113 { 114 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 115 return &fsnotify_conn_inode(conn)->i_fsnotify_mask; 116 else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) 117 return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; 118 return NULL; 119 } 120 121 __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn) 122 { 123 if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) 124 return 0; 125 126 return *fsnotify_conn_mask_p(conn); 127 } 128 129 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 130 { 131 u32 new_mask = 0; 132 struct fsnotify_mark *mark; 133 134 assert_spin_locked(&conn->lock); 135 hlist_for_each_entry(mark, &conn->list, obj_list) { 136 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) 137 new_mask |= mark->mask; 138 } 139 if (WARN_ON(!fsnotify_valid_obj_type(conn->type))) 140 return; 141 142 *fsnotify_conn_mask_p(conn) = new_mask; 143 } 144 145 /* 146 * Calculate mask of events for a list of marks. The caller must make sure 147 * connector and connector->obj cannot disappear under us. Callers achieve 148 * this by holding a mark->lock or mark->group->mark_mutex for a mark on this 149 * list. 150 */ 151 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 152 { 153 if (!conn) 154 return; 155 156 spin_lock(&conn->lock); 157 __fsnotify_recalc_mask(conn); 158 spin_unlock(&conn->lock); 159 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 160 __fsnotify_update_child_dentry_flags( 161 fsnotify_conn_inode(conn)); 162 } 163 164 /* Free all connectors queued for freeing once SRCU period ends */ 165 static void fsnotify_connector_destroy_workfn(struct work_struct *work) 166 { 167 struct fsnotify_mark_connector *conn, *free; 168 169 spin_lock(&destroy_lock); 170 conn = connector_destroy_list; 171 connector_destroy_list = NULL; 172 spin_unlock(&destroy_lock); 173 174 synchronize_srcu(&fsnotify_mark_srcu); 175 while (conn) { 176 free = conn; 177 conn = conn->destroy_next; 178 kmem_cache_free(fsnotify_mark_connector_cachep, free); 179 } 180 } 181 182 static struct inode *fsnotify_detach_connector_from_object( 183 struct fsnotify_mark_connector *conn) 184 { 185 struct inode *inode = NULL; 186 187 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) 188 return NULL; 189 190 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) { 191 inode = fsnotify_conn_inode(conn); 192 inode->i_fsnotify_mask = 0; 193 } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { 194 fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; 195 } 196 197 rcu_assign_pointer(*(conn->obj), NULL); 198 conn->obj = NULL; 199 conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; 200 201 return inode; 202 } 203 204 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) 205 { 206 struct fsnotify_group *group = mark->group; 207 208 if (WARN_ON_ONCE(!group)) 209 return; 210 group->ops->free_mark(mark); 211 fsnotify_put_group(group); 212 } 213 214 void fsnotify_put_mark(struct fsnotify_mark *mark) 215 { 216 struct fsnotify_mark_connector *conn; 217 struct inode *inode = NULL; 218 bool free_conn = false; 219 220 /* Catch marks that were actually never attached to object */ 221 if (!mark->connector) { 222 if (refcount_dec_and_test(&mark->refcnt)) 223 fsnotify_final_mark_destroy(mark); 224 return; 225 } 226 227 /* 228 * We have to be careful so that traversals of obj_list under lock can 229 * safely grab mark reference. 230 */ 231 if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock)) 232 return; 233 234 conn = mark->connector; 235 hlist_del_init_rcu(&mark->obj_list); 236 if (hlist_empty(&conn->list)) { 237 inode = fsnotify_detach_connector_from_object(conn); 238 free_conn = true; 239 } else { 240 __fsnotify_recalc_mask(conn); 241 } 242 mark->connector = NULL; 243 spin_unlock(&conn->lock); 244 245 iput(inode); 246 247 if (free_conn) { 248 spin_lock(&destroy_lock); 249 conn->destroy_next = connector_destroy_list; 250 connector_destroy_list = conn; 251 spin_unlock(&destroy_lock); 252 queue_work(system_unbound_wq, &connector_reaper_work); 253 } 254 /* 255 * Note that we didn't update flags telling whether inode cares about 256 * what's happening with children. We update these flags from 257 * __fsnotify_parent() lazily when next event happens on one of our 258 * children. 259 */ 260 spin_lock(&destroy_lock); 261 list_add(&mark->g_list, &destroy_list); 262 spin_unlock(&destroy_lock); 263 queue_delayed_work(system_unbound_wq, &reaper_work, 264 FSNOTIFY_REAPER_DELAY); 265 } 266 267 /* 268 * Get mark reference when we found the mark via lockless traversal of object 269 * list. Mark can be already removed from the list by now and on its way to be 270 * destroyed once SRCU period ends. 271 * 272 * Also pin the group so it doesn't disappear under us. 273 */ 274 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) 275 { 276 if (!mark) 277 return true; 278 279 if (refcount_inc_not_zero(&mark->refcnt)) { 280 spin_lock(&mark->lock); 281 if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) { 282 /* mark is attached, group is still alive then */ 283 atomic_inc(&mark->group->user_waits); 284 spin_unlock(&mark->lock); 285 return true; 286 } 287 spin_unlock(&mark->lock); 288 fsnotify_put_mark(mark); 289 } 290 return false; 291 } 292 293 /* 294 * Puts marks and wakes up group destruction if necessary. 295 * 296 * Pairs with fsnotify_get_mark_safe() 297 */ 298 static void fsnotify_put_mark_wake(struct fsnotify_mark *mark) 299 { 300 if (mark) { 301 struct fsnotify_group *group = mark->group; 302 303 fsnotify_put_mark(mark); 304 /* 305 * We abuse notification_waitq on group shutdown for waiting for 306 * all marks pinned when waiting for userspace. 307 */ 308 if (atomic_dec_and_test(&group->user_waits) && group->shutdown) 309 wake_up(&group->notification_waitq); 310 } 311 } 312 313 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) 314 { 315 int type; 316 317 fsnotify_foreach_obj_type(type) { 318 /* This can fail if mark is being removed */ 319 if (!fsnotify_get_mark_safe(iter_info->marks[type])) 320 goto fail; 321 } 322 323 /* 324 * Now that both marks are pinned by refcount in the inode / vfsmount 325 * lists, we can drop SRCU lock, and safely resume the list iteration 326 * once userspace returns. 327 */ 328 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); 329 330 return true; 331 332 fail: 333 for (type--; type >= 0; type--) 334 fsnotify_put_mark_wake(iter_info->marks[type]); 335 return false; 336 } 337 338 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) 339 { 340 int type; 341 342 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); 343 fsnotify_foreach_obj_type(type) 344 fsnotify_put_mark_wake(iter_info->marks[type]); 345 } 346 347 /* 348 * Mark mark as detached, remove it from group list. Mark still stays in object 349 * list until its last reference is dropped. Note that we rely on mark being 350 * removed from group list before corresponding reference to it is dropped. In 351 * particular we rely on mark->connector being valid while we hold 352 * group->mark_mutex if we found the mark through g_list. 353 * 354 * Must be called with group->mark_mutex held. The caller must either hold 355 * reference to the mark or be protected by fsnotify_mark_srcu. 356 */ 357 void fsnotify_detach_mark(struct fsnotify_mark *mark) 358 { 359 struct fsnotify_group *group = mark->group; 360 361 WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); 362 WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && 363 refcount_read(&mark->refcnt) < 1 + 364 !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); 365 366 spin_lock(&mark->lock); 367 /* something else already called this function on this mark */ 368 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 369 spin_unlock(&mark->lock); 370 return; 371 } 372 mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; 373 list_del_init(&mark->g_list); 374 spin_unlock(&mark->lock); 375 376 atomic_dec(&group->num_marks); 377 378 /* Drop mark reference acquired in fsnotify_add_mark_locked() */ 379 fsnotify_put_mark(mark); 380 } 381 382 /* 383 * Free fsnotify mark. The mark is actually only marked as being freed. The 384 * freeing is actually happening only once last reference to the mark is 385 * dropped from a workqueue which first waits for srcu period end. 386 * 387 * Caller must have a reference to the mark or be protected by 388 * fsnotify_mark_srcu. 389 */ 390 void fsnotify_free_mark(struct fsnotify_mark *mark) 391 { 392 struct fsnotify_group *group = mark->group; 393 394 spin_lock(&mark->lock); 395 /* something else already called this function on this mark */ 396 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { 397 spin_unlock(&mark->lock); 398 return; 399 } 400 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 401 spin_unlock(&mark->lock); 402 403 /* 404 * Some groups like to know that marks are being freed. This is a 405 * callback to the group function to let it know that this mark 406 * is being freed. 407 */ 408 if (group->ops->freeing_mark) 409 group->ops->freeing_mark(mark, group); 410 } 411 412 void fsnotify_destroy_mark(struct fsnotify_mark *mark, 413 struct fsnotify_group *group) 414 { 415 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 416 fsnotify_detach_mark(mark); 417 mutex_unlock(&group->mark_mutex); 418 fsnotify_free_mark(mark); 419 } 420 421 /* 422 * Sorting function for lists of fsnotify marks. 423 * 424 * Fanotify supports different notification classes (reflected as priority of 425 * notification group). Events shall be passed to notification groups in 426 * decreasing priority order. To achieve this marks in notification lists for 427 * inodes and vfsmounts are sorted so that priorities of corresponding groups 428 * are descending. 429 * 430 * Furthermore correct handling of the ignore mask requires processing inode 431 * and vfsmount marks of each group together. Using the group address as 432 * further sort criterion provides a unique sorting order and thus we can 433 * merge inode and vfsmount lists of marks in linear time and find groups 434 * present in both lists. 435 * 436 * A return value of 1 signifies that b has priority over a. 437 * A return value of 0 signifies that the two marks have to be handled together. 438 * A return value of -1 signifies that a has priority over b. 439 */ 440 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) 441 { 442 if (a == b) 443 return 0; 444 if (!a) 445 return 1; 446 if (!b) 447 return -1; 448 if (a->priority < b->priority) 449 return 1; 450 if (a->priority > b->priority) 451 return -1; 452 if (a < b) 453 return 1; 454 return -1; 455 } 456 457 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp, 458 unsigned int type) 459 { 460 struct inode *inode = NULL; 461 struct fsnotify_mark_connector *conn; 462 463 conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); 464 if (!conn) 465 return -ENOMEM; 466 spin_lock_init(&conn->lock); 467 INIT_HLIST_HEAD(&conn->list); 468 conn->type = type; 469 conn->obj = connp; 470 if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) 471 inode = igrab(fsnotify_conn_inode(conn)); 472 /* 473 * cmpxchg() provides the barrier so that readers of *connp can see 474 * only initialized structure 475 */ 476 if (cmpxchg(connp, NULL, conn)) { 477 /* Someone else created list structure for us */ 478 if (inode) 479 iput(inode); 480 kmem_cache_free(fsnotify_mark_connector_cachep, conn); 481 } 482 483 return 0; 484 } 485 486 /* 487 * Get mark connector, make sure it is alive and return with its lock held. 488 * This is for users that get connector pointer from inode or mount. Users that 489 * hold reference to a mark on the list may directly lock connector->lock as 490 * they are sure list cannot go away under them. 491 */ 492 static struct fsnotify_mark_connector *fsnotify_grab_connector( 493 fsnotify_connp_t *connp) 494 { 495 struct fsnotify_mark_connector *conn; 496 int idx; 497 498 idx = srcu_read_lock(&fsnotify_mark_srcu); 499 conn = srcu_dereference(*connp, &fsnotify_mark_srcu); 500 if (!conn) 501 goto out; 502 spin_lock(&conn->lock); 503 if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) { 504 spin_unlock(&conn->lock); 505 srcu_read_unlock(&fsnotify_mark_srcu, idx); 506 return NULL; 507 } 508 out: 509 srcu_read_unlock(&fsnotify_mark_srcu, idx); 510 return conn; 511 } 512 513 /* 514 * Add mark into proper place in given list of marks. These marks may be used 515 * for the fsnotify backend to determine which event types should be delivered 516 * to which group and for which inodes. These marks are ordered according to 517 * priority, highest number first, and then by the group's location in memory. 518 */ 519 static int fsnotify_add_mark_list(struct fsnotify_mark *mark, 520 fsnotify_connp_t *connp, unsigned int type, 521 int allow_dups) 522 { 523 struct fsnotify_mark *lmark, *last = NULL; 524 struct fsnotify_mark_connector *conn; 525 int cmp; 526 int err = 0; 527 528 if (WARN_ON(!fsnotify_valid_obj_type(type))) 529 return -EINVAL; 530 restart: 531 spin_lock(&mark->lock); 532 conn = fsnotify_grab_connector(connp); 533 if (!conn) { 534 spin_unlock(&mark->lock); 535 err = fsnotify_attach_connector_to_object(connp, type); 536 if (err) 537 return err; 538 goto restart; 539 } 540 541 /* is mark the first mark? */ 542 if (hlist_empty(&conn->list)) { 543 hlist_add_head_rcu(&mark->obj_list, &conn->list); 544 goto added; 545 } 546 547 /* should mark be in the middle of the current list? */ 548 hlist_for_each_entry(lmark, &conn->list, obj_list) { 549 last = lmark; 550 551 if ((lmark->group == mark->group) && 552 (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && 553 !allow_dups) { 554 err = -EEXIST; 555 goto out_err; 556 } 557 558 cmp = fsnotify_compare_groups(lmark->group, mark->group); 559 if (cmp >= 0) { 560 hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); 561 goto added; 562 } 563 } 564 565 BUG_ON(last == NULL); 566 /* mark should be the last entry. last is the current last entry */ 567 hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); 568 added: 569 mark->connector = conn; 570 out_err: 571 spin_unlock(&conn->lock); 572 spin_unlock(&mark->lock); 573 return err; 574 } 575 576 /* 577 * Attach an initialized mark to a given group and fs object. 578 * These marks may be used for the fsnotify backend to determine which 579 * event types should be delivered to which group. 580 */ 581 int fsnotify_add_mark_locked(struct fsnotify_mark *mark, 582 fsnotify_connp_t *connp, unsigned int type, 583 int allow_dups) 584 { 585 struct fsnotify_group *group = mark->group; 586 int ret = 0; 587 588 BUG_ON(!mutex_is_locked(&group->mark_mutex)); 589 590 /* 591 * LOCKING ORDER!!!! 592 * group->mark_mutex 593 * mark->lock 594 * mark->connector->lock 595 */ 596 spin_lock(&mark->lock); 597 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; 598 599 list_add(&mark->g_list, &group->marks_list); 600 atomic_inc(&group->num_marks); 601 fsnotify_get_mark(mark); /* for g_list */ 602 spin_unlock(&mark->lock); 603 604 ret = fsnotify_add_mark_list(mark, connp, type, allow_dups); 605 if (ret) 606 goto err; 607 608 if (mark->mask) 609 fsnotify_recalc_mask(mark->connector); 610 611 return ret; 612 err: 613 spin_lock(&mark->lock); 614 mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | 615 FSNOTIFY_MARK_FLAG_ATTACHED); 616 list_del_init(&mark->g_list); 617 spin_unlock(&mark->lock); 618 atomic_dec(&group->num_marks); 619 620 fsnotify_put_mark(mark); 621 return ret; 622 } 623 624 int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp, 625 unsigned int type, int allow_dups) 626 { 627 int ret; 628 struct fsnotify_group *group = mark->group; 629 630 mutex_lock(&group->mark_mutex); 631 ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups); 632 mutex_unlock(&group->mark_mutex); 633 return ret; 634 } 635 636 /* 637 * Given a list of marks, find the mark associated with given group. If found 638 * take a reference to that mark and return it, else return NULL. 639 */ 640 struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, 641 struct fsnotify_group *group) 642 { 643 struct fsnotify_mark_connector *conn; 644 struct fsnotify_mark *mark; 645 646 conn = fsnotify_grab_connector(connp); 647 if (!conn) 648 return NULL; 649 650 hlist_for_each_entry(mark, &conn->list, obj_list) { 651 if (mark->group == group && 652 (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { 653 fsnotify_get_mark(mark); 654 spin_unlock(&conn->lock); 655 return mark; 656 } 657 } 658 spin_unlock(&conn->lock); 659 return NULL; 660 } 661 662 /* Clear any marks in a group with given type mask */ 663 void fsnotify_clear_marks_by_group(struct fsnotify_group *group, 664 unsigned int type_mask) 665 { 666 struct fsnotify_mark *lmark, *mark; 667 LIST_HEAD(to_free); 668 struct list_head *head = &to_free; 669 670 /* Skip selection step if we want to clear all marks. */ 671 if (type_mask == FSNOTIFY_OBJ_ALL_TYPES_MASK) { 672 head = &group->marks_list; 673 goto clear; 674 } 675 /* 676 * We have to be really careful here. Anytime we drop mark_mutex, e.g. 677 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our 678 * to_free list so we have to use mark_mutex even when accessing that 679 * list. And freeing mark requires us to drop mark_mutex. So we can 680 * reliably free only the first mark in the list. That's why we first 681 * move marks to free to to_free list in one go and then free marks in 682 * to_free list one by one. 683 */ 684 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 685 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { 686 if ((1U << mark->connector->type) & type_mask) 687 list_move(&mark->g_list, &to_free); 688 } 689 mutex_unlock(&group->mark_mutex); 690 691 clear: 692 while (1) { 693 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); 694 if (list_empty(head)) { 695 mutex_unlock(&group->mark_mutex); 696 break; 697 } 698 mark = list_first_entry(head, struct fsnotify_mark, g_list); 699 fsnotify_get_mark(mark); 700 fsnotify_detach_mark(mark); 701 mutex_unlock(&group->mark_mutex); 702 fsnotify_free_mark(mark); 703 fsnotify_put_mark(mark); 704 } 705 } 706 707 /* Destroy all marks attached to an object via connector */ 708 void fsnotify_destroy_marks(fsnotify_connp_t *connp) 709 { 710 struct fsnotify_mark_connector *conn; 711 struct fsnotify_mark *mark, *old_mark = NULL; 712 struct inode *inode; 713 714 conn = fsnotify_grab_connector(connp); 715 if (!conn) 716 return; 717 /* 718 * We have to be careful since we can race with e.g. 719 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the 720 * list can get modified. However we are holding mark reference and 721 * thus our mark cannot be removed from obj_list so we can continue 722 * iteration after regaining conn->lock. 723 */ 724 hlist_for_each_entry(mark, &conn->list, obj_list) { 725 fsnotify_get_mark(mark); 726 spin_unlock(&conn->lock); 727 if (old_mark) 728 fsnotify_put_mark(old_mark); 729 old_mark = mark; 730 fsnotify_destroy_mark(mark, mark->group); 731 spin_lock(&conn->lock); 732 } 733 /* 734 * Detach list from object now so that we don't pin inode until all 735 * mark references get dropped. It would lead to strange results such 736 * as delaying inode deletion or blocking unmount. 737 */ 738 inode = fsnotify_detach_connector_from_object(conn); 739 spin_unlock(&conn->lock); 740 if (old_mark) 741 fsnotify_put_mark(old_mark); 742 iput(inode); 743 } 744 745 /* 746 * Nothing fancy, just initialize lists and locks and counters. 747 */ 748 void fsnotify_init_mark(struct fsnotify_mark *mark, 749 struct fsnotify_group *group) 750 { 751 memset(mark, 0, sizeof(*mark)); 752 spin_lock_init(&mark->lock); 753 refcount_set(&mark->refcnt, 1); 754 fsnotify_get_group(group); 755 mark->group = group; 756 } 757 758 /* 759 * Destroy all marks in destroy_list, waits for SRCU period to finish before 760 * actually freeing marks. 761 */ 762 static void fsnotify_mark_destroy_workfn(struct work_struct *work) 763 { 764 struct fsnotify_mark *mark, *next; 765 struct list_head private_destroy_list; 766 767 spin_lock(&destroy_lock); 768 /* exchange the list head */ 769 list_replace_init(&destroy_list, &private_destroy_list); 770 spin_unlock(&destroy_lock); 771 772 synchronize_srcu(&fsnotify_mark_srcu); 773 774 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 775 list_del_init(&mark->g_list); 776 fsnotify_final_mark_destroy(mark); 777 } 778 } 779 780 /* Wait for all marks queued for destruction to be actually destroyed */ 781 void fsnotify_wait_marks_destroyed(void) 782 { 783 flush_delayed_work(&reaper_work); 784 } 785