1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * The NFSD open file cache. 4 * 5 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 6 * 7 * An nfsd_file object is a per-file collection of open state that binds 8 * together: 9 * - a struct file * 10 * - a user credential 11 * - a network namespace 12 * - a read-ahead context 13 * - monitoring for writeback errors 14 * 15 * nfsd_file objects are reference-counted. Consumers acquire a new 16 * object via the nfsd_file_acquire API. They manage their interest in 17 * the acquired object, and hence the object's reference count, via 18 * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file 19 * object: 20 * 21 * * non-garbage-collected: When a consumer wants to precisely control 22 * the lifetime of a file's open state, it acquires a non-garbage- 23 * collected nfsd_file. The final nfsd_file_put releases the open 24 * state immediately. 25 * 26 * * garbage-collected: When a consumer does not control the lifetime 27 * of open state, it acquires a garbage-collected nfsd_file. The 28 * final nfsd_file_put allows the open state to linger for a period 29 * during which it may be re-used. 30 */ 31 32 #include <linux/hash.h> 33 #include <linux/slab.h> 34 #include <linux/file.h> 35 #include <linux/pagemap.h> 36 #include <linux/sched.h> 37 #include <linux/list_lru.h> 38 #include <linux/fsnotify_backend.h> 39 #include <linux/fsnotify.h> 40 #include <linux/seq_file.h> 41 #include <linux/rhashtable.h> 42 43 #include "vfs.h" 44 #include "nfsd.h" 45 #include "nfsfh.h" 46 #include "netns.h" 47 #include "filecache.h" 48 #include "trace.h" 49 50 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 51 52 #define NFSD_FILE_CACHE_UP (0) 53 54 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 55 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 56 57 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 58 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 59 static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations); 60 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 61 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 62 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 63 64 struct nfsd_fcache_disposal { 65 spinlock_t lock; 66 struct list_head freeme; 67 }; 68 69 static struct kmem_cache *nfsd_file_slab; 70 static struct kmem_cache *nfsd_file_mark_slab; 71 static struct list_lru nfsd_file_lru; 72 static unsigned long nfsd_file_flags; 73 static struct fsnotify_group *nfsd_file_fsnotify_group; 74 static struct delayed_work nfsd_filecache_laundrette; 75 static struct rhltable nfsd_file_rhltable 76 ____cacheline_aligned_in_smp; 77 78 static bool 79 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 80 { 81 int i; 82 83 if (!uid_eq(c1->fsuid, c2->fsuid)) 84 return false; 85 if (!gid_eq(c1->fsgid, c2->fsgid)) 86 return false; 87 if (c1->group_info == NULL || c2->group_info == NULL) 88 return c1->group_info == c2->group_info; 89 if (c1->group_info->ngroups != c2->group_info->ngroups) 90 return false; 91 for (i = 0; i < c1->group_info->ngroups; i++) { 92 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 93 return false; 94 } 95 return true; 96 } 97 98 static const struct rhashtable_params nfsd_file_rhash_params = { 99 .key_len = sizeof_field(struct nfsd_file, nf_inode), 100 .key_offset = offsetof(struct nfsd_file, nf_inode), 101 .head_offset = offsetof(struct nfsd_file, nf_rlist), 102 103 /* 104 * Start with a single page hash table to reduce resizing churn 105 * on light workloads. 106 */ 107 .min_size = 256, 108 .automatic_shrinking = true, 109 }; 110 111 static void 112 nfsd_file_schedule_laundrette(void) 113 { 114 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) 115 queue_delayed_work(system_unbound_wq, &nfsd_filecache_laundrette, 116 NFSD_LAUNDRETTE_DELAY); 117 } 118 119 static void 120 nfsd_file_slab_free(struct rcu_head *rcu) 121 { 122 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 123 124 put_cred(nf->nf_cred); 125 kmem_cache_free(nfsd_file_slab, nf); 126 } 127 128 static void 129 nfsd_file_mark_free(struct fsnotify_mark *mark) 130 { 131 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 132 nfm_mark); 133 134 kmem_cache_free(nfsd_file_mark_slab, nfm); 135 } 136 137 static struct nfsd_file_mark * 138 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 139 { 140 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 141 return NULL; 142 return nfm; 143 } 144 145 static void 146 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 147 { 148 if (refcount_dec_and_test(&nfm->nfm_ref)) { 149 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 150 fsnotify_put_mark(&nfm->nfm_mark); 151 } 152 } 153 154 static struct nfsd_file_mark * 155 nfsd_file_mark_find_or_create(struct inode *inode) 156 { 157 int err; 158 struct fsnotify_mark *mark; 159 struct nfsd_file_mark *nfm = NULL, *new; 160 161 do { 162 fsnotify_group_lock(nfsd_file_fsnotify_group); 163 mark = fsnotify_find_inode_mark(inode, 164 nfsd_file_fsnotify_group); 165 if (mark) { 166 nfm = nfsd_file_mark_get(container_of(mark, 167 struct nfsd_file_mark, 168 nfm_mark)); 169 fsnotify_group_unlock(nfsd_file_fsnotify_group); 170 if (nfm) { 171 fsnotify_put_mark(mark); 172 break; 173 } 174 /* Avoid soft lockup race with nfsd_file_mark_put() */ 175 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 176 fsnotify_put_mark(mark); 177 } else { 178 fsnotify_group_unlock(nfsd_file_fsnotify_group); 179 } 180 181 /* allocate a new nfm */ 182 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 183 if (!new) 184 return NULL; 185 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 186 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 187 refcount_set(&new->nfm_ref, 1); 188 189 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 190 191 /* 192 * If the add was successful, then return the object. 193 * Otherwise, we need to put the reference we hold on the 194 * nfm_mark. The fsnotify code will take a reference and put 195 * it on failure, so we can't just free it directly. It's also 196 * not safe to call fsnotify_destroy_mark on it as the 197 * mark->group will be NULL. Thus, we can't let the nfm_ref 198 * counter drive the destruction at this point. 199 */ 200 if (likely(!err)) 201 nfm = new; 202 else 203 fsnotify_put_mark(&new->nfm_mark); 204 } while (unlikely(err == -EEXIST)); 205 206 return nfm; 207 } 208 209 static struct nfsd_file * 210 nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, 211 bool want_gc) 212 { 213 struct nfsd_file *nf; 214 215 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 216 if (unlikely(!nf)) 217 return NULL; 218 219 this_cpu_inc(nfsd_file_allocations); 220 INIT_LIST_HEAD(&nf->nf_lru); 221 INIT_LIST_HEAD(&nf->nf_gc); 222 nf->nf_birthtime = ktime_get(); 223 nf->nf_file = NULL; 224 nf->nf_cred = get_current_cred(); 225 nf->nf_net = net; 226 nf->nf_flags = want_gc ? 227 BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) : 228 BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING); 229 nf->nf_inode = inode; 230 refcount_set(&nf->nf_ref, 1); 231 nf->nf_may = need; 232 nf->nf_mark = NULL; 233 return nf; 234 } 235 236 /** 237 * nfsd_file_check_write_error - check for writeback errors on a file 238 * @nf: nfsd_file to check for writeback errors 239 * 240 * Check whether a nfsd_file has an unseen error. Reset the write 241 * verifier if so. 242 */ 243 static void 244 nfsd_file_check_write_error(struct nfsd_file *nf) 245 { 246 struct file *file = nf->nf_file; 247 248 if ((file->f_mode & FMODE_WRITE) && 249 filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err))) 250 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 251 } 252 253 static void 254 nfsd_file_hash_remove(struct nfsd_file *nf) 255 { 256 trace_nfsd_file_unhash(nf); 257 rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist, 258 nfsd_file_rhash_params); 259 } 260 261 static bool 262 nfsd_file_unhash(struct nfsd_file *nf) 263 { 264 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 265 nfsd_file_hash_remove(nf); 266 return true; 267 } 268 return false; 269 } 270 271 static void 272 nfsd_file_free(struct nfsd_file *nf) 273 { 274 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 275 276 trace_nfsd_file_free(nf); 277 278 this_cpu_inc(nfsd_file_releases); 279 this_cpu_add(nfsd_file_total_age, age); 280 281 nfsd_file_unhash(nf); 282 if (nf->nf_mark) 283 nfsd_file_mark_put(nf->nf_mark); 284 if (nf->nf_file) { 285 nfsd_file_check_write_error(nf); 286 nfsd_filp_close(nf->nf_file); 287 } 288 289 /* 290 * If this item is still linked via nf_lru, that's a bug. 291 * WARN and leak it to preserve system stability. 292 */ 293 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 294 return; 295 296 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 297 } 298 299 static bool 300 nfsd_file_check_writeback(struct nfsd_file *nf) 301 { 302 struct file *file = nf->nf_file; 303 struct address_space *mapping; 304 305 /* File not open for write? */ 306 if (!(file->f_mode & FMODE_WRITE)) 307 return false; 308 309 /* 310 * Some filesystems (e.g. NFS) flush all dirty data on close. 311 * On others, there is no need to wait for writeback. 312 */ 313 if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE)) 314 return false; 315 316 mapping = file->f_mapping; 317 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 318 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 319 } 320 321 322 static bool nfsd_file_lru_add(struct nfsd_file *nf) 323 { 324 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 325 if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) { 326 trace_nfsd_file_lru_add(nf); 327 return true; 328 } 329 return false; 330 } 331 332 static bool nfsd_file_lru_remove(struct nfsd_file *nf) 333 { 334 if (list_lru_del_obj(&nfsd_file_lru, &nf->nf_lru)) { 335 trace_nfsd_file_lru_del(nf); 336 return true; 337 } 338 return false; 339 } 340 341 struct nfsd_file * 342 nfsd_file_get(struct nfsd_file *nf) 343 { 344 if (nf && refcount_inc_not_zero(&nf->nf_ref)) 345 return nf; 346 return NULL; 347 } 348 349 /** 350 * nfsd_file_put - put the reference to a nfsd_file 351 * @nf: nfsd_file of which to put the reference 352 * 353 * Put a reference to a nfsd_file. In the non-GC case, we just put the 354 * reference immediately. In the GC case, if the reference would be 355 * the last one, the put it on the LRU instead to be cleaned up later. 356 */ 357 void 358 nfsd_file_put(struct nfsd_file *nf) 359 { 360 might_sleep(); 361 trace_nfsd_file_put(nf); 362 363 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && 364 test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 365 /* 366 * If this is the last reference (nf_ref == 1), then try to 367 * transfer it to the LRU. 368 */ 369 if (refcount_dec_not_one(&nf->nf_ref)) 370 return; 371 372 /* Try to add it to the LRU. If that fails, decrement. */ 373 if (nfsd_file_lru_add(nf)) { 374 /* If it's still hashed, we're done */ 375 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 376 nfsd_file_schedule_laundrette(); 377 return; 378 } 379 380 /* 381 * We're racing with unhashing, so try to remove it from 382 * the LRU. If removal fails, then someone else already 383 * has our reference. 384 */ 385 if (!nfsd_file_lru_remove(nf)) 386 return; 387 } 388 } 389 if (refcount_dec_and_test(&nf->nf_ref)) 390 nfsd_file_free(nf); 391 } 392 393 static void 394 nfsd_file_dispose_list(struct list_head *dispose) 395 { 396 struct nfsd_file *nf; 397 398 while (!list_empty(dispose)) { 399 nf = list_first_entry(dispose, struct nfsd_file, nf_gc); 400 list_del_init(&nf->nf_gc); 401 nfsd_file_free(nf); 402 } 403 } 404 405 /** 406 * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list 407 * @dispose: list of nfsd_files to be disposed 408 * 409 * Transfers each file to the "freeme" list for its nfsd_net, to eventually 410 * be disposed of by the per-net garbage collector. 411 */ 412 static void 413 nfsd_file_dispose_list_delayed(struct list_head *dispose) 414 { 415 while(!list_empty(dispose)) { 416 struct nfsd_file *nf = list_first_entry(dispose, 417 struct nfsd_file, nf_gc); 418 struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); 419 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 420 421 spin_lock(&l->lock); 422 list_move_tail(&nf->nf_gc, &l->freeme); 423 spin_unlock(&l->lock); 424 svc_wake_up(nn->nfsd_serv); 425 } 426 } 427 428 /** 429 * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed. 430 * @nn: nfsd_net in which to find files to be disposed. 431 * 432 * When files held open for nfsv3 are removed from the filecache, whether 433 * due to memory pressure or garbage collection, they are queued to 434 * a per-net-ns queue. This function completes the disposal, either 435 * directly or by waking another nfsd thread to help with the work. 436 */ 437 void nfsd_file_net_dispose(struct nfsd_net *nn) 438 { 439 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 440 441 if (!list_empty(&l->freeme)) { 442 LIST_HEAD(dispose); 443 int i; 444 445 spin_lock(&l->lock); 446 for (i = 0; i < 8 && !list_empty(&l->freeme); i++) 447 list_move(l->freeme.next, &dispose); 448 spin_unlock(&l->lock); 449 if (!list_empty(&l->freeme)) 450 /* Wake up another thread to share the work 451 * *before* doing any actual disposing. 452 */ 453 svc_wake_up(nn->nfsd_serv); 454 nfsd_file_dispose_list(&dispose); 455 } 456 } 457 458 /** 459 * nfsd_file_lru_cb - Examine an entry on the LRU list 460 * @item: LRU entry to examine 461 * @lru: controlling LRU 462 * @lock: LRU list lock (unused) 463 * @arg: dispose list 464 * 465 * Return values: 466 * %LRU_REMOVED: @item was removed from the LRU 467 * %LRU_ROTATE: @item is to be moved to the LRU tail 468 * %LRU_SKIP: @item cannot be evicted 469 */ 470 static enum lru_status 471 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 472 spinlock_t *lock, void *arg) 473 __releases(lock) 474 __acquires(lock) 475 { 476 struct list_head *head = arg; 477 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 478 479 /* We should only be dealing with GC entries here */ 480 WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); 481 482 /* 483 * Don't throw out files that are still undergoing I/O or 484 * that have uncleared errors pending. 485 */ 486 if (nfsd_file_check_writeback(nf)) { 487 trace_nfsd_file_gc_writeback(nf); 488 return LRU_SKIP; 489 } 490 491 /* If it was recently added to the list, skip it */ 492 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 493 trace_nfsd_file_gc_referenced(nf); 494 return LRU_ROTATE; 495 } 496 497 /* 498 * Put the reference held on behalf of the LRU. If it wasn't the last 499 * one, then just remove it from the LRU and ignore it. 500 */ 501 if (!refcount_dec_and_test(&nf->nf_ref)) { 502 trace_nfsd_file_gc_in_use(nf); 503 list_lru_isolate(lru, &nf->nf_lru); 504 return LRU_REMOVED; 505 } 506 507 /* Refcount went to zero. Unhash it and queue it to the dispose list */ 508 nfsd_file_unhash(nf); 509 list_lru_isolate(lru, &nf->nf_lru); 510 list_add(&nf->nf_gc, head); 511 this_cpu_inc(nfsd_file_evictions); 512 trace_nfsd_file_gc_disposed(nf); 513 return LRU_REMOVED; 514 } 515 516 static void 517 nfsd_file_gc(void) 518 { 519 LIST_HEAD(dispose); 520 unsigned long ret; 521 522 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 523 &dispose, list_lru_count(&nfsd_file_lru)); 524 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 525 nfsd_file_dispose_list_delayed(&dispose); 526 } 527 528 static void 529 nfsd_file_gc_worker(struct work_struct *work) 530 { 531 nfsd_file_gc(); 532 if (list_lru_count(&nfsd_file_lru)) 533 nfsd_file_schedule_laundrette(); 534 } 535 536 static unsigned long 537 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 538 { 539 return list_lru_count(&nfsd_file_lru); 540 } 541 542 static unsigned long 543 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 544 { 545 LIST_HEAD(dispose); 546 unsigned long ret; 547 548 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 549 nfsd_file_lru_cb, &dispose); 550 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 551 nfsd_file_dispose_list_delayed(&dispose); 552 return ret; 553 } 554 555 static struct shrinker *nfsd_file_shrinker; 556 557 /** 558 * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file 559 * @nf: nfsd_file to attempt to queue 560 * @dispose: private list to queue successfully-put objects 561 * 562 * Unhash an nfsd_file, try to get a reference to it, and then put that 563 * reference. If it's the last reference, queue it to the dispose list. 564 */ 565 static void 566 nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) 567 __must_hold(RCU) 568 { 569 int decrement = 1; 570 571 /* If we raced with someone else unhashing, ignore it */ 572 if (!nfsd_file_unhash(nf)) 573 return; 574 575 /* If we can't get a reference, ignore it */ 576 if (!nfsd_file_get(nf)) 577 return; 578 579 /* Extra decrement if we remove from the LRU */ 580 if (nfsd_file_lru_remove(nf)) 581 ++decrement; 582 583 /* If refcount goes to 0, then put on the dispose list */ 584 if (refcount_sub_and_test(decrement, &nf->nf_ref)) { 585 list_add(&nf->nf_gc, dispose); 586 trace_nfsd_file_closing(nf); 587 } 588 } 589 590 /** 591 * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode 592 * @inode: inode on which to close out nfsd_files 593 * @dispose: list on which to gather nfsd_files to close out 594 * 595 * An nfsd_file represents a struct file being held open on behalf of nfsd. 596 * An open file however can block other activity (such as leases), or cause 597 * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). 598 * 599 * This function is intended to find open nfsd_files when this sort of 600 * conflicting access occurs and then attempt to close those files out. 601 * 602 * Populates the dispose list with entries that have already had their 603 * refcounts go to zero. The actual free of an nfsd_file can be expensive, 604 * so we leave it up to the caller whether it wants to wait or not. 605 */ 606 static void 607 nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) 608 { 609 struct rhlist_head *tmp, *list; 610 struct nfsd_file *nf; 611 612 rcu_read_lock(); 613 list = rhltable_lookup(&nfsd_file_rhltable, &inode, 614 nfsd_file_rhash_params); 615 rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { 616 if (!test_bit(NFSD_FILE_GC, &nf->nf_flags)) 617 continue; 618 nfsd_file_cond_queue(nf, dispose); 619 } 620 rcu_read_unlock(); 621 } 622 623 /** 624 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 625 * @inode: inode of the file to attempt to remove 626 * 627 * Close out any open nfsd_files that can be reaped for @inode. The 628 * actual freeing is deferred to the dispose_list_delayed infrastructure. 629 * 630 * This is used by the fsnotify callbacks and setlease notifier. 631 */ 632 static void 633 nfsd_file_close_inode(struct inode *inode) 634 { 635 LIST_HEAD(dispose); 636 637 nfsd_file_queue_for_close(inode, &dispose); 638 nfsd_file_dispose_list_delayed(&dispose); 639 } 640 641 /** 642 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 643 * @inode: inode of the file to attempt to remove 644 * 645 * Close out any open nfsd_files that can be reaped for @inode. The 646 * nfsd_files are closed out synchronously. 647 * 648 * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames 649 * when reexporting NFS. 650 */ 651 void 652 nfsd_file_close_inode_sync(struct inode *inode) 653 { 654 struct nfsd_file *nf; 655 LIST_HEAD(dispose); 656 657 trace_nfsd_file_close(inode); 658 659 nfsd_file_queue_for_close(inode, &dispose); 660 while (!list_empty(&dispose)) { 661 nf = list_first_entry(&dispose, struct nfsd_file, nf_gc); 662 list_del_init(&nf->nf_gc); 663 nfsd_file_free(nf); 664 } 665 } 666 667 static int 668 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 669 void *data) 670 { 671 struct file_lease *fl = data; 672 673 /* Only close files for F_SETLEASE leases */ 674 if (fl->c.flc_flags & FL_LEASE) 675 nfsd_file_close_inode(file_inode(fl->c.flc_file)); 676 return 0; 677 } 678 679 static struct notifier_block nfsd_file_lease_notifier = { 680 .notifier_call = nfsd_file_lease_notifier_call, 681 }; 682 683 static int 684 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 685 struct inode *inode, struct inode *dir, 686 const struct qstr *name, u32 cookie) 687 { 688 if (WARN_ON_ONCE(!inode)) 689 return 0; 690 691 trace_nfsd_file_fsnotify_handle_event(inode, mask); 692 693 /* Should be no marks on non-regular files */ 694 if (!S_ISREG(inode->i_mode)) { 695 WARN_ON_ONCE(1); 696 return 0; 697 } 698 699 /* don't close files if this was not the last link */ 700 if (mask & FS_ATTRIB) { 701 if (inode->i_nlink) 702 return 0; 703 } 704 705 nfsd_file_close_inode(inode); 706 return 0; 707 } 708 709 710 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 711 .handle_inode_event = nfsd_file_fsnotify_handle_event, 712 .free_mark = nfsd_file_mark_free, 713 }; 714 715 int 716 nfsd_file_cache_init(void) 717 { 718 int ret; 719 720 lockdep_assert_held(&nfsd_mutex); 721 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 722 return 0; 723 724 ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); 725 if (ret) 726 return ret; 727 728 ret = -ENOMEM; 729 nfsd_file_slab = KMEM_CACHE(nfsd_file, 0); 730 if (!nfsd_file_slab) { 731 pr_err("nfsd: unable to create nfsd_file_slab\n"); 732 goto out_err; 733 } 734 735 nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0); 736 if (!nfsd_file_mark_slab) { 737 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 738 goto out_err; 739 } 740 741 ret = list_lru_init(&nfsd_file_lru); 742 if (ret) { 743 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 744 goto out_err; 745 } 746 747 nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache"); 748 if (!nfsd_file_shrinker) { 749 ret = -ENOMEM; 750 pr_err("nfsd: failed to allocate nfsd_file_shrinker\n"); 751 goto out_lru; 752 } 753 754 nfsd_file_shrinker->count_objects = nfsd_file_lru_count; 755 nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan; 756 nfsd_file_shrinker->seeks = 1; 757 758 shrinker_register(nfsd_file_shrinker); 759 760 ret = lease_register_notifier(&nfsd_file_lease_notifier); 761 if (ret) { 762 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 763 goto out_shrinker; 764 } 765 766 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 767 FSNOTIFY_GROUP_NOFS); 768 if (IS_ERR(nfsd_file_fsnotify_group)) { 769 pr_err("nfsd: unable to create fsnotify group: %ld\n", 770 PTR_ERR(nfsd_file_fsnotify_group)); 771 ret = PTR_ERR(nfsd_file_fsnotify_group); 772 nfsd_file_fsnotify_group = NULL; 773 goto out_notifier; 774 } 775 776 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 777 out: 778 return ret; 779 out_notifier: 780 lease_unregister_notifier(&nfsd_file_lease_notifier); 781 out_shrinker: 782 shrinker_free(nfsd_file_shrinker); 783 out_lru: 784 list_lru_destroy(&nfsd_file_lru); 785 out_err: 786 kmem_cache_destroy(nfsd_file_slab); 787 nfsd_file_slab = NULL; 788 kmem_cache_destroy(nfsd_file_mark_slab); 789 nfsd_file_mark_slab = NULL; 790 rhltable_destroy(&nfsd_file_rhltable); 791 goto out; 792 } 793 794 /** 795 * __nfsd_file_cache_purge: clean out the cache for shutdown 796 * @net: net-namespace to shut down the cache (may be NULL) 797 * 798 * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, 799 * then close out everything. Called when an nfsd instance is being shut down, 800 * and when the exports table is flushed. 801 */ 802 static void 803 __nfsd_file_cache_purge(struct net *net) 804 { 805 struct rhashtable_iter iter; 806 struct nfsd_file *nf; 807 LIST_HEAD(dispose); 808 809 rhltable_walk_enter(&nfsd_file_rhltable, &iter); 810 do { 811 rhashtable_walk_start(&iter); 812 813 nf = rhashtable_walk_next(&iter); 814 while (!IS_ERR_OR_NULL(nf)) { 815 if (!net || nf->nf_net == net) 816 nfsd_file_cond_queue(nf, &dispose); 817 nf = rhashtable_walk_next(&iter); 818 } 819 820 rhashtable_walk_stop(&iter); 821 } while (nf == ERR_PTR(-EAGAIN)); 822 rhashtable_walk_exit(&iter); 823 824 nfsd_file_dispose_list(&dispose); 825 } 826 827 static struct nfsd_fcache_disposal * 828 nfsd_alloc_fcache_disposal(void) 829 { 830 struct nfsd_fcache_disposal *l; 831 832 l = kmalloc(sizeof(*l), GFP_KERNEL); 833 if (!l) 834 return NULL; 835 spin_lock_init(&l->lock); 836 INIT_LIST_HEAD(&l->freeme); 837 return l; 838 } 839 840 static void 841 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 842 { 843 nfsd_file_dispose_list(&l->freeme); 844 kfree(l); 845 } 846 847 static void 848 nfsd_free_fcache_disposal_net(struct net *net) 849 { 850 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 851 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 852 853 nfsd_free_fcache_disposal(l); 854 } 855 856 int 857 nfsd_file_cache_start_net(struct net *net) 858 { 859 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 860 861 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 862 return nn->fcache_disposal ? 0 : -ENOMEM; 863 } 864 865 /** 866 * nfsd_file_cache_purge - Remove all cache items associated with @net 867 * @net: target net namespace 868 * 869 */ 870 void 871 nfsd_file_cache_purge(struct net *net) 872 { 873 lockdep_assert_held(&nfsd_mutex); 874 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 875 __nfsd_file_cache_purge(net); 876 } 877 878 void 879 nfsd_file_cache_shutdown_net(struct net *net) 880 { 881 nfsd_file_cache_purge(net); 882 nfsd_free_fcache_disposal_net(net); 883 } 884 885 void 886 nfsd_file_cache_shutdown(void) 887 { 888 int i; 889 890 lockdep_assert_held(&nfsd_mutex); 891 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 892 return; 893 894 lease_unregister_notifier(&nfsd_file_lease_notifier); 895 shrinker_free(nfsd_file_shrinker); 896 /* 897 * make sure all callers of nfsd_file_lru_cb are done before 898 * calling nfsd_file_cache_purge 899 */ 900 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 901 __nfsd_file_cache_purge(NULL); 902 list_lru_destroy(&nfsd_file_lru); 903 rcu_barrier(); 904 fsnotify_put_group(nfsd_file_fsnotify_group); 905 nfsd_file_fsnotify_group = NULL; 906 kmem_cache_destroy(nfsd_file_slab); 907 nfsd_file_slab = NULL; 908 fsnotify_wait_marks_destroyed(); 909 kmem_cache_destroy(nfsd_file_mark_slab); 910 nfsd_file_mark_slab = NULL; 911 rhltable_destroy(&nfsd_file_rhltable); 912 913 for_each_possible_cpu(i) { 914 per_cpu(nfsd_file_cache_hits, i) = 0; 915 per_cpu(nfsd_file_acquisitions, i) = 0; 916 per_cpu(nfsd_file_allocations, i) = 0; 917 per_cpu(nfsd_file_releases, i) = 0; 918 per_cpu(nfsd_file_total_age, i) = 0; 919 per_cpu(nfsd_file_evictions, i) = 0; 920 } 921 } 922 923 static struct nfsd_file * 924 nfsd_file_lookup_locked(const struct net *net, const struct cred *cred, 925 struct inode *inode, unsigned char need, 926 bool want_gc) 927 { 928 struct rhlist_head *tmp, *list; 929 struct nfsd_file *nf; 930 931 list = rhltable_lookup(&nfsd_file_rhltable, &inode, 932 nfsd_file_rhash_params); 933 rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) { 934 if (nf->nf_may != need) 935 continue; 936 if (nf->nf_net != net) 937 continue; 938 if (!nfsd_match_cred(nf->nf_cred, cred)) 939 continue; 940 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc) 941 continue; 942 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 943 continue; 944 945 if (!nfsd_file_get(nf)) 946 continue; 947 return nf; 948 } 949 return NULL; 950 } 951 952 /** 953 * nfsd_file_is_cached - are there any cached open files for this inode? 954 * @inode: inode to check 955 * 956 * The lookup matches inodes in all net namespaces and is atomic wrt 957 * nfsd_file_acquire(). 958 * 959 * Return values: 960 * %true: filecache contains at least one file matching this inode 961 * %false: filecache contains no files matching this inode 962 */ 963 bool 964 nfsd_file_is_cached(struct inode *inode) 965 { 966 struct rhlist_head *tmp, *list; 967 struct nfsd_file *nf; 968 bool ret = false; 969 970 rcu_read_lock(); 971 list = rhltable_lookup(&nfsd_file_rhltable, &inode, 972 nfsd_file_rhash_params); 973 rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) 974 if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) { 975 ret = true; 976 break; 977 } 978 rcu_read_unlock(); 979 980 trace_nfsd_file_is_cached(inode, (int)ret); 981 return ret; 982 } 983 984 static __be32 985 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 986 unsigned int may_flags, struct file *file, 987 struct nfsd_file **pnf, bool want_gc) 988 { 989 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 990 struct net *net = SVC_NET(rqstp); 991 struct nfsd_file *new, *nf; 992 bool stale_retry = true; 993 bool open_retry = true; 994 struct inode *inode; 995 __be32 status; 996 int ret; 997 998 retry: 999 status = fh_verify(rqstp, fhp, S_IFREG, 1000 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1001 if (status != nfs_ok) 1002 return status; 1003 inode = d_inode(fhp->fh_dentry); 1004 1005 rcu_read_lock(); 1006 nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); 1007 rcu_read_unlock(); 1008 1009 if (nf) { 1010 /* 1011 * If the nf is on the LRU then it holds an extra reference 1012 * that must be put if it's removed. It had better not be 1013 * the last one however, since we should hold another. 1014 */ 1015 if (nfsd_file_lru_remove(nf)) 1016 WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); 1017 goto wait_for_construction; 1018 } 1019 1020 new = nfsd_file_alloc(net, inode, need, want_gc); 1021 if (!new) { 1022 status = nfserr_jukebox; 1023 goto out; 1024 } 1025 1026 rcu_read_lock(); 1027 spin_lock(&inode->i_lock); 1028 nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); 1029 if (unlikely(nf)) { 1030 spin_unlock(&inode->i_lock); 1031 rcu_read_unlock(); 1032 nfsd_file_free(new); 1033 goto wait_for_construction; 1034 } 1035 nf = new; 1036 ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist, 1037 nfsd_file_rhash_params); 1038 spin_unlock(&inode->i_lock); 1039 rcu_read_unlock(); 1040 if (likely(ret == 0)) 1041 goto open_file; 1042 1043 trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); 1044 status = nfserr_jukebox; 1045 goto construction_err; 1046 1047 wait_for_construction: 1048 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1049 1050 /* Did construction of this file fail? */ 1051 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1052 trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf); 1053 if (!open_retry) { 1054 status = nfserr_jukebox; 1055 goto construction_err; 1056 } 1057 nfsd_file_put(nf); 1058 open_retry = false; 1059 fh_put(fhp); 1060 goto retry; 1061 } 1062 this_cpu_inc(nfsd_file_cache_hits); 1063 1064 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1065 if (status != nfs_ok) { 1066 nfsd_file_put(nf); 1067 nf = NULL; 1068 } 1069 1070 out: 1071 if (status == nfs_ok) { 1072 this_cpu_inc(nfsd_file_acquisitions); 1073 nfsd_file_check_write_error(nf); 1074 *pnf = nf; 1075 } 1076 trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status); 1077 return status; 1078 1079 open_file: 1080 trace_nfsd_file_alloc(nf); 1081 nf->nf_mark = nfsd_file_mark_find_or_create(inode); 1082 if (nf->nf_mark) { 1083 if (file) { 1084 get_file(file); 1085 nf->nf_file = file; 1086 status = nfs_ok; 1087 trace_nfsd_file_opened(nf, status); 1088 } else { 1089 ret = nfsd_open_verified(rqstp, fhp, may_flags, 1090 &nf->nf_file); 1091 if (ret == -EOPENSTALE && stale_retry) { 1092 stale_retry = false; 1093 nfsd_file_unhash(nf); 1094 clear_and_wake_up_bit(NFSD_FILE_PENDING, 1095 &nf->nf_flags); 1096 if (refcount_dec_and_test(&nf->nf_ref)) 1097 nfsd_file_free(nf); 1098 nf = NULL; 1099 fh_put(fhp); 1100 goto retry; 1101 } 1102 status = nfserrno(ret); 1103 trace_nfsd_file_open(nf, status); 1104 } 1105 } else 1106 status = nfserr_jukebox; 1107 /* 1108 * If construction failed, or we raced with a call to unlink() 1109 * then unhash. 1110 */ 1111 if (status != nfs_ok || inode->i_nlink == 0) 1112 nfsd_file_unhash(nf); 1113 clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1114 if (status == nfs_ok) 1115 goto out; 1116 1117 construction_err: 1118 if (refcount_dec_and_test(&nf->nf_ref)) 1119 nfsd_file_free(nf); 1120 nf = NULL; 1121 goto out; 1122 } 1123 1124 /** 1125 * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file 1126 * @rqstp: the RPC transaction being executed 1127 * @fhp: the NFS filehandle of the file to be opened 1128 * @may_flags: NFSD_MAY_ settings for the file 1129 * @pnf: OUT: new or found "struct nfsd_file" object 1130 * 1131 * The nfsd_file object returned by this API is reference-counted 1132 * and garbage-collected. The object is retained for a few 1133 * seconds after the final nfsd_file_put() in case the caller 1134 * wants to re-use it. 1135 * 1136 * Return values: 1137 * %nfs_ok - @pnf points to an nfsd_file with its reference 1138 * count boosted. 1139 * 1140 * On error, an nfsstat value in network byte order is returned. 1141 */ 1142 __be32 1143 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, 1144 unsigned int may_flags, struct nfsd_file **pnf) 1145 { 1146 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); 1147 } 1148 1149 /** 1150 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1151 * @rqstp: the RPC transaction being executed 1152 * @fhp: the NFS filehandle of the file to be opened 1153 * @may_flags: NFSD_MAY_ settings for the file 1154 * @pnf: OUT: new or found "struct nfsd_file" object 1155 * 1156 * The nfsd_file_object returned by this API is reference-counted 1157 * but not garbage-collected. The object is unhashed after the 1158 * final nfsd_file_put(). 1159 * 1160 * Return values: 1161 * %nfs_ok - @pnf points to an nfsd_file with its reference 1162 * count boosted. 1163 * 1164 * On error, an nfsstat value in network byte order is returned. 1165 */ 1166 __be32 1167 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1168 unsigned int may_flags, struct nfsd_file **pnf) 1169 { 1170 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); 1171 } 1172 1173 /** 1174 * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file 1175 * @rqstp: the RPC transaction being executed 1176 * @fhp: the NFS filehandle of the file just created 1177 * @may_flags: NFSD_MAY_ settings for the file 1178 * @file: cached, already-open file (may be NULL) 1179 * @pnf: OUT: new or found "struct nfsd_file" object 1180 * 1181 * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, 1182 * and @file is non-NULL, use it to instantiate a new nfsd_file instead of 1183 * opening a new one. 1184 * 1185 * Return values: 1186 * %nfs_ok - @pnf points to an nfsd_file with its reference 1187 * count boosted. 1188 * 1189 * On error, an nfsstat value in network byte order is returned. 1190 */ 1191 __be32 1192 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, 1193 unsigned int may_flags, struct file *file, 1194 struct nfsd_file **pnf) 1195 { 1196 return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); 1197 } 1198 1199 /* 1200 * Note that fields may be added, removed or reordered in the future. Programs 1201 * scraping this file for info should test the labels to ensure they're 1202 * getting the correct field. 1203 */ 1204 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1205 { 1206 unsigned long allocations = 0, releases = 0, evictions = 0; 1207 unsigned long hits = 0, acquisitions = 0; 1208 unsigned int i, count = 0, buckets = 0; 1209 unsigned long lru = 0, total_age = 0; 1210 1211 /* Serialize with server shutdown */ 1212 mutex_lock(&nfsd_mutex); 1213 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1214 struct bucket_table *tbl; 1215 struct rhashtable *ht; 1216 1217 lru = list_lru_count(&nfsd_file_lru); 1218 1219 rcu_read_lock(); 1220 ht = &nfsd_file_rhltable.ht; 1221 count = atomic_read(&ht->nelems); 1222 tbl = rht_dereference_rcu(ht->tbl, ht); 1223 buckets = tbl->size; 1224 rcu_read_unlock(); 1225 } 1226 mutex_unlock(&nfsd_mutex); 1227 1228 for_each_possible_cpu(i) { 1229 hits += per_cpu(nfsd_file_cache_hits, i); 1230 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1231 allocations += per_cpu(nfsd_file_allocations, i); 1232 releases += per_cpu(nfsd_file_releases, i); 1233 total_age += per_cpu(nfsd_file_total_age, i); 1234 evictions += per_cpu(nfsd_file_evictions, i); 1235 } 1236 1237 seq_printf(m, "total inodes: %u\n", count); 1238 seq_printf(m, "hash buckets: %u\n", buckets); 1239 seq_printf(m, "lru entries: %lu\n", lru); 1240 seq_printf(m, "cache hits: %lu\n", hits); 1241 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1242 seq_printf(m, "allocations: %lu\n", allocations); 1243 seq_printf(m, "releases: %lu\n", releases); 1244 seq_printf(m, "evictions: %lu\n", evictions); 1245 if (releases) 1246 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1247 else 1248 seq_printf(m, "mean age (ms): -\n"); 1249 return 0; 1250 } 1251