1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * The NFSD open file cache. 4 * 5 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 6 * 7 * An nfsd_file object is a per-file collection of open state that binds 8 * together: 9 * - a struct file * 10 * - a user credential 11 * - a network namespace 12 * - a read-ahead context 13 * - monitoring for writeback errors 14 * 15 * nfsd_file objects are reference-counted. Consumers acquire a new 16 * object via the nfsd_file_acquire API. They manage their interest in 17 * the acquired object, and hence the object's reference count, via 18 * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file 19 * object: 20 * 21 * * non-garbage-collected: When a consumer wants to precisely control 22 * the lifetime of a file's open state, it acquires a non-garbage- 23 * collected nfsd_file. The final nfsd_file_put releases the open 24 * state immediately. 25 * 26 * * garbage-collected: When a consumer does not control the lifetime 27 * of open state, it acquires a garbage-collected nfsd_file. The 28 * final nfsd_file_put allows the open state to linger for a period 29 * during which it may be re-used. 30 */ 31 32 #include <linux/hash.h> 33 #include <linux/slab.h> 34 #include <linux/file.h> 35 #include <linux/pagemap.h> 36 #include <linux/sched.h> 37 #include <linux/list_lru.h> 38 #include <linux/fsnotify_backend.h> 39 #include <linux/fsnotify.h> 40 #include <linux/seq_file.h> 41 #include <linux/rhashtable.h> 42 43 #include "vfs.h" 44 #include "nfsd.h" 45 #include "nfsfh.h" 46 #include "netns.h" 47 #include "filecache.h" 48 #include "trace.h" 49 50 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 51 52 #define NFSD_FILE_CACHE_UP (0) 53 54 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 55 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 56 57 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 58 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 59 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 60 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 61 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 62 63 struct nfsd_fcache_disposal { 64 struct work_struct work; 65 spinlock_t lock; 66 struct list_head freeme; 67 }; 68 69 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 70 71 static struct kmem_cache *nfsd_file_slab; 72 static struct kmem_cache *nfsd_file_mark_slab; 73 static struct list_lru nfsd_file_lru; 74 static unsigned long nfsd_file_flags; 75 static struct fsnotify_group *nfsd_file_fsnotify_group; 76 static struct delayed_work nfsd_filecache_laundrette; 77 static struct rhashtable nfsd_file_rhash_tbl 78 ____cacheline_aligned_in_smp; 79 80 enum nfsd_file_lookup_type { 81 NFSD_FILE_KEY_INODE, 82 NFSD_FILE_KEY_FULL, 83 }; 84 85 struct nfsd_file_lookup_key { 86 struct inode *inode; 87 struct net *net; 88 const struct cred *cred; 89 unsigned char need; 90 bool gc; 91 enum nfsd_file_lookup_type type; 92 }; 93 94 /* 95 * The returned hash value is based solely on the address of an in-code 96 * inode, a pointer to a slab-allocated object. The entropy in such a 97 * pointer is concentrated in its middle bits. 98 */ 99 static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) 100 { 101 unsigned long ptr = (unsigned long)inode; 102 u32 k; 103 104 k = ptr >> L1_CACHE_SHIFT; 105 k &= 0x00ffffff; 106 return jhash2(&k, 1, seed); 107 } 108 109 /** 110 * nfsd_file_key_hashfn - Compute the hash value of a lookup key 111 * @data: key on which to compute the hash value 112 * @len: rhash table's key_len parameter (unused) 113 * @seed: rhash table's random seed of the day 114 * 115 * Return value: 116 * Computed 32-bit hash value 117 */ 118 static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) 119 { 120 const struct nfsd_file_lookup_key *key = data; 121 122 return nfsd_file_inode_hash(key->inode, seed); 123 } 124 125 /** 126 * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file 127 * @data: object on which to compute the hash value 128 * @len: rhash table's key_len parameter (unused) 129 * @seed: rhash table's random seed of the day 130 * 131 * Return value: 132 * Computed 32-bit hash value 133 */ 134 static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) 135 { 136 const struct nfsd_file *nf = data; 137 138 return nfsd_file_inode_hash(nf->nf_inode, seed); 139 } 140 141 static bool 142 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 143 { 144 int i; 145 146 if (!uid_eq(c1->fsuid, c2->fsuid)) 147 return false; 148 if (!gid_eq(c1->fsgid, c2->fsgid)) 149 return false; 150 if (c1->group_info == NULL || c2->group_info == NULL) 151 return c1->group_info == c2->group_info; 152 if (c1->group_info->ngroups != c2->group_info->ngroups) 153 return false; 154 for (i = 0; i < c1->group_info->ngroups; i++) { 155 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 156 return false; 157 } 158 return true; 159 } 160 161 /** 162 * nfsd_file_obj_cmpfn - Match a cache item against search criteria 163 * @arg: search criteria 164 * @ptr: cache item to check 165 * 166 * Return values: 167 * %0 - Item matches search criteria 168 * %1 - Item does not match search criteria 169 */ 170 static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, 171 const void *ptr) 172 { 173 const struct nfsd_file_lookup_key *key = arg->key; 174 const struct nfsd_file *nf = ptr; 175 176 switch (key->type) { 177 case NFSD_FILE_KEY_INODE: 178 if (nf->nf_inode != key->inode) 179 return 1; 180 break; 181 case NFSD_FILE_KEY_FULL: 182 if (nf->nf_inode != key->inode) 183 return 1; 184 if (nf->nf_may != key->need) 185 return 1; 186 if (nf->nf_net != key->net) 187 return 1; 188 if (!nfsd_match_cred(nf->nf_cred, key->cred)) 189 return 1; 190 if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) 191 return 1; 192 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 193 return 1; 194 break; 195 } 196 return 0; 197 } 198 199 static const struct rhashtable_params nfsd_file_rhash_params = { 200 .key_len = sizeof_field(struct nfsd_file, nf_inode), 201 .key_offset = offsetof(struct nfsd_file, nf_inode), 202 .head_offset = offsetof(struct nfsd_file, nf_rhash), 203 .hashfn = nfsd_file_key_hashfn, 204 .obj_hashfn = nfsd_file_obj_hashfn, 205 .obj_cmpfn = nfsd_file_obj_cmpfn, 206 /* Reduce resizing churn on light workloads */ 207 .min_size = 512, /* buckets */ 208 .automatic_shrinking = true, 209 }; 210 211 static void 212 nfsd_file_schedule_laundrette(void) 213 { 214 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) 215 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 216 NFSD_LAUNDRETTE_DELAY); 217 } 218 219 static void 220 nfsd_file_slab_free(struct rcu_head *rcu) 221 { 222 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 223 224 put_cred(nf->nf_cred); 225 kmem_cache_free(nfsd_file_slab, nf); 226 } 227 228 static void 229 nfsd_file_mark_free(struct fsnotify_mark *mark) 230 { 231 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 232 nfm_mark); 233 234 kmem_cache_free(nfsd_file_mark_slab, nfm); 235 } 236 237 static struct nfsd_file_mark * 238 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 239 { 240 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 241 return NULL; 242 return nfm; 243 } 244 245 static void 246 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 247 { 248 if (refcount_dec_and_test(&nfm->nfm_ref)) { 249 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 250 fsnotify_put_mark(&nfm->nfm_mark); 251 } 252 } 253 254 static struct nfsd_file_mark * 255 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) 256 { 257 int err; 258 struct fsnotify_mark *mark; 259 struct nfsd_file_mark *nfm = NULL, *new; 260 261 do { 262 fsnotify_group_lock(nfsd_file_fsnotify_group); 263 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 264 nfsd_file_fsnotify_group); 265 if (mark) { 266 nfm = nfsd_file_mark_get(container_of(mark, 267 struct nfsd_file_mark, 268 nfm_mark)); 269 fsnotify_group_unlock(nfsd_file_fsnotify_group); 270 if (nfm) { 271 fsnotify_put_mark(mark); 272 break; 273 } 274 /* Avoid soft lockup race with nfsd_file_mark_put() */ 275 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 276 fsnotify_put_mark(mark); 277 } else { 278 fsnotify_group_unlock(nfsd_file_fsnotify_group); 279 } 280 281 /* allocate a new nfm */ 282 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 283 if (!new) 284 return NULL; 285 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 286 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 287 refcount_set(&new->nfm_ref, 1); 288 289 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 290 291 /* 292 * If the add was successful, then return the object. 293 * Otherwise, we need to put the reference we hold on the 294 * nfm_mark. The fsnotify code will take a reference and put 295 * it on failure, so we can't just free it directly. It's also 296 * not safe to call fsnotify_destroy_mark on it as the 297 * mark->group will be NULL. Thus, we can't let the nfm_ref 298 * counter drive the destruction at this point. 299 */ 300 if (likely(!err)) 301 nfm = new; 302 else 303 fsnotify_put_mark(&new->nfm_mark); 304 } while (unlikely(err == -EEXIST)); 305 306 return nfm; 307 } 308 309 static struct nfsd_file * 310 nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) 311 { 312 struct nfsd_file *nf; 313 314 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 315 if (nf) { 316 INIT_LIST_HEAD(&nf->nf_lru); 317 nf->nf_birthtime = ktime_get(); 318 nf->nf_file = NULL; 319 nf->nf_cred = get_current_cred(); 320 nf->nf_net = key->net; 321 nf->nf_flags = 0; 322 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 323 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 324 if (key->gc) 325 __set_bit(NFSD_FILE_GC, &nf->nf_flags); 326 nf->nf_inode = key->inode; 327 refcount_set(&nf->nf_ref, 1); 328 nf->nf_may = key->need; 329 nf->nf_mark = NULL; 330 } 331 return nf; 332 } 333 334 static void 335 nfsd_file_fsync(struct nfsd_file *nf) 336 { 337 struct file *file = nf->nf_file; 338 int ret; 339 340 if (!file || !(file->f_mode & FMODE_WRITE)) 341 return; 342 ret = vfs_fsync(file, 1); 343 trace_nfsd_file_fsync(nf, ret); 344 if (ret) 345 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 346 } 347 348 static int 349 nfsd_file_check_write_error(struct nfsd_file *nf) 350 { 351 struct file *file = nf->nf_file; 352 353 if (!file || !(file->f_mode & FMODE_WRITE)) 354 return 0; 355 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 356 } 357 358 static void 359 nfsd_file_hash_remove(struct nfsd_file *nf) 360 { 361 trace_nfsd_file_unhash(nf); 362 363 if (nfsd_file_check_write_error(nf)) 364 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 365 rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, 366 nfsd_file_rhash_params); 367 } 368 369 static bool 370 nfsd_file_unhash(struct nfsd_file *nf) 371 { 372 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 373 nfsd_file_hash_remove(nf); 374 return true; 375 } 376 return false; 377 } 378 379 static void 380 nfsd_file_free(struct nfsd_file *nf) 381 { 382 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 383 384 trace_nfsd_file_free(nf); 385 386 this_cpu_inc(nfsd_file_releases); 387 this_cpu_add(nfsd_file_total_age, age); 388 389 nfsd_file_unhash(nf); 390 391 /* 392 * We call fsync here in order to catch writeback errors. It's not 393 * strictly required by the protocol, but an nfsd_file could get 394 * evicted from the cache before a COMMIT comes in. If another 395 * task were to open that file in the interim and scrape the error, 396 * then the client may never see it. By calling fsync here, we ensure 397 * that writeback happens before the entry is freed, and that any 398 * errors reported result in the write verifier changing. 399 */ 400 nfsd_file_fsync(nf); 401 402 if (nf->nf_mark) 403 nfsd_file_mark_put(nf->nf_mark); 404 if (nf->nf_file) { 405 get_file(nf->nf_file); 406 filp_close(nf->nf_file, NULL); 407 fput(nf->nf_file); 408 } 409 410 /* 411 * If this item is still linked via nf_lru, that's a bug. 412 * WARN and leak it to preserve system stability. 413 */ 414 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 415 return; 416 417 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 418 } 419 420 static bool 421 nfsd_file_check_writeback(struct nfsd_file *nf) 422 { 423 struct file *file = nf->nf_file; 424 struct address_space *mapping; 425 426 if (!file || !(file->f_mode & FMODE_WRITE)) 427 return false; 428 mapping = file->f_mapping; 429 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 430 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 431 } 432 433 static bool nfsd_file_lru_add(struct nfsd_file *nf) 434 { 435 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 436 if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { 437 trace_nfsd_file_lru_add(nf); 438 return true; 439 } 440 return false; 441 } 442 443 static bool nfsd_file_lru_remove(struct nfsd_file *nf) 444 { 445 if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { 446 trace_nfsd_file_lru_del(nf); 447 return true; 448 } 449 return false; 450 } 451 452 struct nfsd_file * 453 nfsd_file_get(struct nfsd_file *nf) 454 { 455 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 456 return nf; 457 return NULL; 458 } 459 460 /** 461 * nfsd_file_put - put the reference to a nfsd_file 462 * @nf: nfsd_file of which to put the reference 463 * 464 * Put a reference to a nfsd_file. In the non-GC case, we just put the 465 * reference immediately. In the GC case, if the reference would be 466 * the last one, the put it on the LRU instead to be cleaned up later. 467 */ 468 void 469 nfsd_file_put(struct nfsd_file *nf) 470 { 471 might_sleep(); 472 trace_nfsd_file_put(nf); 473 474 if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && 475 test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 476 /* 477 * If this is the last reference (nf_ref == 1), then try to 478 * transfer it to the LRU. 479 */ 480 if (refcount_dec_not_one(&nf->nf_ref)) 481 return; 482 483 /* Try to add it to the LRU. If that fails, decrement. */ 484 if (nfsd_file_lru_add(nf)) { 485 /* If it's still hashed, we're done */ 486 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 487 nfsd_file_schedule_laundrette(); 488 return; 489 } 490 491 /* 492 * We're racing with unhashing, so try to remove it from 493 * the LRU. If removal fails, then someone else already 494 * has our reference. 495 */ 496 if (!nfsd_file_lru_remove(nf)) 497 return; 498 } 499 } 500 if (refcount_dec_and_test(&nf->nf_ref)) 501 nfsd_file_free(nf); 502 } 503 504 static void 505 nfsd_file_dispose_list(struct list_head *dispose) 506 { 507 struct nfsd_file *nf; 508 509 while (!list_empty(dispose)) { 510 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 511 list_del_init(&nf->nf_lru); 512 nfsd_file_free(nf); 513 } 514 } 515 516 static void 517 nfsd_file_list_remove_disposal(struct list_head *dst, 518 struct nfsd_fcache_disposal *l) 519 { 520 spin_lock(&l->lock); 521 list_splice_init(&l->freeme, dst); 522 spin_unlock(&l->lock); 523 } 524 525 static void 526 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 527 { 528 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 529 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 530 531 spin_lock(&l->lock); 532 list_splice_tail_init(files, &l->freeme); 533 spin_unlock(&l->lock); 534 queue_work(nfsd_filecache_wq, &l->work); 535 } 536 537 static void 538 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 539 struct net *net) 540 { 541 struct nfsd_file *nf, *tmp; 542 543 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 544 if (nf->nf_net == net) 545 list_move_tail(&nf->nf_lru, dst); 546 } 547 } 548 549 static void 550 nfsd_file_dispose_list_delayed(struct list_head *dispose) 551 { 552 LIST_HEAD(list); 553 struct nfsd_file *nf; 554 555 while(!list_empty(dispose)) { 556 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 557 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 558 nfsd_file_list_add_disposal(&list, nf->nf_net); 559 } 560 } 561 562 /** 563 * nfsd_file_lru_cb - Examine an entry on the LRU list 564 * @item: LRU entry to examine 565 * @lru: controlling LRU 566 * @lock: LRU list lock (unused) 567 * @arg: dispose list 568 * 569 * Return values: 570 * %LRU_REMOVED: @item was removed from the LRU 571 * %LRU_ROTATE: @item is to be moved to the LRU tail 572 * %LRU_SKIP: @item cannot be evicted 573 */ 574 static enum lru_status 575 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 576 spinlock_t *lock, void *arg) 577 __releases(lock) 578 __acquires(lock) 579 { 580 struct list_head *head = arg; 581 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 582 583 /* We should only be dealing with GC entries here */ 584 WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); 585 586 /* 587 * Don't throw out files that are still undergoing I/O or 588 * that have uncleared errors pending. 589 */ 590 if (nfsd_file_check_writeback(nf)) { 591 trace_nfsd_file_gc_writeback(nf); 592 return LRU_SKIP; 593 } 594 595 /* If it was recently added to the list, skip it */ 596 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 597 trace_nfsd_file_gc_referenced(nf); 598 return LRU_ROTATE; 599 } 600 601 /* 602 * Put the reference held on behalf of the LRU. If it wasn't the last 603 * one, then just remove it from the LRU and ignore it. 604 */ 605 if (!refcount_dec_and_test(&nf->nf_ref)) { 606 trace_nfsd_file_gc_in_use(nf); 607 list_lru_isolate(lru, &nf->nf_lru); 608 return LRU_REMOVED; 609 } 610 611 /* Refcount went to zero. Unhash it and queue it to the dispose list */ 612 nfsd_file_unhash(nf); 613 list_lru_isolate_move(lru, &nf->nf_lru, head); 614 this_cpu_inc(nfsd_file_evictions); 615 trace_nfsd_file_gc_disposed(nf); 616 return LRU_REMOVED; 617 } 618 619 static void 620 nfsd_file_gc(void) 621 { 622 LIST_HEAD(dispose); 623 unsigned long ret; 624 625 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 626 &dispose, list_lru_count(&nfsd_file_lru)); 627 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 628 nfsd_file_dispose_list_delayed(&dispose); 629 } 630 631 static void 632 nfsd_file_gc_worker(struct work_struct *work) 633 { 634 nfsd_file_gc(); 635 if (list_lru_count(&nfsd_file_lru)) 636 nfsd_file_schedule_laundrette(); 637 } 638 639 static unsigned long 640 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 641 { 642 return list_lru_count(&nfsd_file_lru); 643 } 644 645 static unsigned long 646 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 647 { 648 LIST_HEAD(dispose); 649 unsigned long ret; 650 651 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 652 nfsd_file_lru_cb, &dispose); 653 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 654 nfsd_file_dispose_list_delayed(&dispose); 655 return ret; 656 } 657 658 static struct shrinker nfsd_file_shrinker = { 659 .scan_objects = nfsd_file_lru_scan, 660 .count_objects = nfsd_file_lru_count, 661 .seeks = 1, 662 }; 663 664 /** 665 * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file 666 * @nf: nfsd_file to attempt to queue 667 * @dispose: private list to queue successfully-put objects 668 * 669 * Unhash an nfsd_file, try to get a reference to it, and then put that 670 * reference. If it's the last reference, queue it to the dispose list. 671 */ 672 static void 673 nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) 674 __must_hold(RCU) 675 { 676 int decrement = 1; 677 678 /* If we raced with someone else unhashing, ignore it */ 679 if (!nfsd_file_unhash(nf)) 680 return; 681 682 /* If we can't get a reference, ignore it */ 683 if (!nfsd_file_get(nf)) 684 return; 685 686 /* Extra decrement if we remove from the LRU */ 687 if (nfsd_file_lru_remove(nf)) 688 ++decrement; 689 690 /* If refcount goes to 0, then put on the dispose list */ 691 if (refcount_sub_and_test(decrement, &nf->nf_ref)) { 692 list_add(&nf->nf_lru, dispose); 693 trace_nfsd_file_closing(nf); 694 } 695 } 696 697 /** 698 * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode 699 * @inode: inode on which to close out nfsd_files 700 * @dispose: list on which to gather nfsd_files to close out 701 * 702 * An nfsd_file represents a struct file being held open on behalf of nfsd. An 703 * open file however can block other activity (such as leases), or cause 704 * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). 705 * 706 * This function is intended to find open nfsd_files when this sort of 707 * conflicting access occurs and then attempt to close those files out. 708 * 709 * Populates the dispose list with entries that have already had their 710 * refcounts go to zero. The actual free of an nfsd_file can be expensive, 711 * so we leave it up to the caller whether it wants to wait or not. 712 */ 713 static void 714 nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) 715 { 716 struct nfsd_file_lookup_key key = { 717 .type = NFSD_FILE_KEY_INODE, 718 .inode = inode, 719 }; 720 struct nfsd_file *nf; 721 722 rcu_read_lock(); 723 do { 724 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 725 nfsd_file_rhash_params); 726 if (!nf) 727 break; 728 nfsd_file_cond_queue(nf, dispose); 729 } while (1); 730 rcu_read_unlock(); 731 } 732 733 /** 734 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 735 * @inode: inode of the file to attempt to remove 736 * 737 * Close out any open nfsd_files that can be reaped for @inode. The 738 * actual freeing is deferred to the dispose_list_delayed infrastructure. 739 * 740 * This is used by the fsnotify callbacks and setlease notifier. 741 */ 742 static void 743 nfsd_file_close_inode(struct inode *inode) 744 { 745 LIST_HEAD(dispose); 746 747 nfsd_file_queue_for_close(inode, &dispose); 748 nfsd_file_dispose_list_delayed(&dispose); 749 } 750 751 /** 752 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 753 * @inode: inode of the file to attempt to remove 754 * 755 * Close out any open nfsd_files that can be reaped for @inode. The 756 * nfsd_files are closed out synchronously. 757 * 758 * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames 759 * when reexporting NFS. 760 */ 761 void 762 nfsd_file_close_inode_sync(struct inode *inode) 763 { 764 struct nfsd_file *nf; 765 LIST_HEAD(dispose); 766 767 trace_nfsd_file_close(inode); 768 769 nfsd_file_queue_for_close(inode, &dispose); 770 while (!list_empty(&dispose)) { 771 nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); 772 list_del_init(&nf->nf_lru); 773 nfsd_file_free(nf); 774 } 775 flush_delayed_fput(); 776 } 777 778 /** 779 * nfsd_file_delayed_close - close unused nfsd_files 780 * @work: dummy 781 * 782 * Walk the LRU list and destroy any entries that have not been used since 783 * the last scan. 784 */ 785 static void 786 nfsd_file_delayed_close(struct work_struct *work) 787 { 788 LIST_HEAD(head); 789 struct nfsd_fcache_disposal *l = container_of(work, 790 struct nfsd_fcache_disposal, work); 791 792 nfsd_file_list_remove_disposal(&head, l); 793 nfsd_file_dispose_list(&head); 794 } 795 796 static int 797 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 798 void *data) 799 { 800 struct file_lock *fl = data; 801 802 /* Only close files for F_SETLEASE leases */ 803 if (fl->fl_flags & FL_LEASE) 804 nfsd_file_close_inode(file_inode(fl->fl_file)); 805 return 0; 806 } 807 808 static struct notifier_block nfsd_file_lease_notifier = { 809 .notifier_call = nfsd_file_lease_notifier_call, 810 }; 811 812 static int 813 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 814 struct inode *inode, struct inode *dir, 815 const struct qstr *name, u32 cookie) 816 { 817 if (WARN_ON_ONCE(!inode)) 818 return 0; 819 820 trace_nfsd_file_fsnotify_handle_event(inode, mask); 821 822 /* Should be no marks on non-regular files */ 823 if (!S_ISREG(inode->i_mode)) { 824 WARN_ON_ONCE(1); 825 return 0; 826 } 827 828 /* don't close files if this was not the last link */ 829 if (mask & FS_ATTRIB) { 830 if (inode->i_nlink) 831 return 0; 832 } 833 834 nfsd_file_close_inode(inode); 835 return 0; 836 } 837 838 839 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 840 .handle_inode_event = nfsd_file_fsnotify_handle_event, 841 .free_mark = nfsd_file_mark_free, 842 }; 843 844 int 845 nfsd_file_cache_init(void) 846 { 847 int ret; 848 849 lockdep_assert_held(&nfsd_mutex); 850 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 851 return 0; 852 853 ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); 854 if (ret) 855 return ret; 856 857 ret = -ENOMEM; 858 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 859 if (!nfsd_filecache_wq) 860 goto out; 861 862 nfsd_file_slab = kmem_cache_create("nfsd_file", 863 sizeof(struct nfsd_file), 0, 0, NULL); 864 if (!nfsd_file_slab) { 865 pr_err("nfsd: unable to create nfsd_file_slab\n"); 866 goto out_err; 867 } 868 869 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 870 sizeof(struct nfsd_file_mark), 0, 0, NULL); 871 if (!nfsd_file_mark_slab) { 872 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 873 goto out_err; 874 } 875 876 877 ret = list_lru_init(&nfsd_file_lru); 878 if (ret) { 879 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 880 goto out_err; 881 } 882 883 ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); 884 if (ret) { 885 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 886 goto out_lru; 887 } 888 889 ret = lease_register_notifier(&nfsd_file_lease_notifier); 890 if (ret) { 891 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 892 goto out_shrinker; 893 } 894 895 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 896 FSNOTIFY_GROUP_NOFS); 897 if (IS_ERR(nfsd_file_fsnotify_group)) { 898 pr_err("nfsd: unable to create fsnotify group: %ld\n", 899 PTR_ERR(nfsd_file_fsnotify_group)); 900 ret = PTR_ERR(nfsd_file_fsnotify_group); 901 nfsd_file_fsnotify_group = NULL; 902 goto out_notifier; 903 } 904 905 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 906 out: 907 return ret; 908 out_notifier: 909 lease_unregister_notifier(&nfsd_file_lease_notifier); 910 out_shrinker: 911 unregister_shrinker(&nfsd_file_shrinker); 912 out_lru: 913 list_lru_destroy(&nfsd_file_lru); 914 out_err: 915 kmem_cache_destroy(nfsd_file_slab); 916 nfsd_file_slab = NULL; 917 kmem_cache_destroy(nfsd_file_mark_slab); 918 nfsd_file_mark_slab = NULL; 919 destroy_workqueue(nfsd_filecache_wq); 920 nfsd_filecache_wq = NULL; 921 rhashtable_destroy(&nfsd_file_rhash_tbl); 922 goto out; 923 } 924 925 /** 926 * __nfsd_file_cache_purge: clean out the cache for shutdown 927 * @net: net-namespace to shut down the cache (may be NULL) 928 * 929 * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, 930 * then close out everything. Called when an nfsd instance is being shut down. 931 */ 932 static void 933 __nfsd_file_cache_purge(struct net *net) 934 { 935 struct rhashtable_iter iter; 936 struct nfsd_file *nf; 937 LIST_HEAD(dispose); 938 939 rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); 940 do { 941 rhashtable_walk_start(&iter); 942 943 nf = rhashtable_walk_next(&iter); 944 while (!IS_ERR_OR_NULL(nf)) { 945 if (!net || nf->nf_net == net) 946 nfsd_file_cond_queue(nf, &dispose); 947 nf = rhashtable_walk_next(&iter); 948 } 949 950 rhashtable_walk_stop(&iter); 951 } while (nf == ERR_PTR(-EAGAIN)); 952 rhashtable_walk_exit(&iter); 953 954 nfsd_file_dispose_list(&dispose); 955 } 956 957 static struct nfsd_fcache_disposal * 958 nfsd_alloc_fcache_disposal(void) 959 { 960 struct nfsd_fcache_disposal *l; 961 962 l = kmalloc(sizeof(*l), GFP_KERNEL); 963 if (!l) 964 return NULL; 965 INIT_WORK(&l->work, nfsd_file_delayed_close); 966 spin_lock_init(&l->lock); 967 INIT_LIST_HEAD(&l->freeme); 968 return l; 969 } 970 971 static void 972 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 973 { 974 cancel_work_sync(&l->work); 975 nfsd_file_dispose_list(&l->freeme); 976 kfree(l); 977 } 978 979 static void 980 nfsd_free_fcache_disposal_net(struct net *net) 981 { 982 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 983 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 984 985 nfsd_free_fcache_disposal(l); 986 } 987 988 int 989 nfsd_file_cache_start_net(struct net *net) 990 { 991 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 992 993 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 994 return nn->fcache_disposal ? 0 : -ENOMEM; 995 } 996 997 /** 998 * nfsd_file_cache_purge - Remove all cache items associated with @net 999 * @net: target net namespace 1000 * 1001 */ 1002 void 1003 nfsd_file_cache_purge(struct net *net) 1004 { 1005 lockdep_assert_held(&nfsd_mutex); 1006 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 1007 __nfsd_file_cache_purge(net); 1008 } 1009 1010 void 1011 nfsd_file_cache_shutdown_net(struct net *net) 1012 { 1013 nfsd_file_cache_purge(net); 1014 nfsd_free_fcache_disposal_net(net); 1015 } 1016 1017 void 1018 nfsd_file_cache_shutdown(void) 1019 { 1020 int i; 1021 1022 lockdep_assert_held(&nfsd_mutex); 1023 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 1024 return; 1025 1026 lease_unregister_notifier(&nfsd_file_lease_notifier); 1027 unregister_shrinker(&nfsd_file_shrinker); 1028 /* 1029 * make sure all callers of nfsd_file_lru_cb are done before 1030 * calling nfsd_file_cache_purge 1031 */ 1032 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 1033 __nfsd_file_cache_purge(NULL); 1034 list_lru_destroy(&nfsd_file_lru); 1035 rcu_barrier(); 1036 fsnotify_put_group(nfsd_file_fsnotify_group); 1037 nfsd_file_fsnotify_group = NULL; 1038 kmem_cache_destroy(nfsd_file_slab); 1039 nfsd_file_slab = NULL; 1040 fsnotify_wait_marks_destroyed(); 1041 kmem_cache_destroy(nfsd_file_mark_slab); 1042 nfsd_file_mark_slab = NULL; 1043 destroy_workqueue(nfsd_filecache_wq); 1044 nfsd_filecache_wq = NULL; 1045 rhashtable_destroy(&nfsd_file_rhash_tbl); 1046 1047 for_each_possible_cpu(i) { 1048 per_cpu(nfsd_file_cache_hits, i) = 0; 1049 per_cpu(nfsd_file_acquisitions, i) = 0; 1050 per_cpu(nfsd_file_releases, i) = 0; 1051 per_cpu(nfsd_file_total_age, i) = 0; 1052 per_cpu(nfsd_file_evictions, i) = 0; 1053 } 1054 } 1055 1056 /** 1057 * nfsd_file_is_cached - are there any cached open files for this inode? 1058 * @inode: inode to check 1059 * 1060 * The lookup matches inodes in all net namespaces and is atomic wrt 1061 * nfsd_file_acquire(). 1062 * 1063 * Return values: 1064 * %true: filecache contains at least one file matching this inode 1065 * %false: filecache contains no files matching this inode 1066 */ 1067 bool 1068 nfsd_file_is_cached(struct inode *inode) 1069 { 1070 struct nfsd_file_lookup_key key = { 1071 .type = NFSD_FILE_KEY_INODE, 1072 .inode = inode, 1073 }; 1074 bool ret = false; 1075 1076 if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 1077 nfsd_file_rhash_params) != NULL) 1078 ret = true; 1079 trace_nfsd_file_is_cached(inode, (int)ret); 1080 return ret; 1081 } 1082 1083 static __be32 1084 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1085 unsigned int may_flags, struct file *file, 1086 struct nfsd_file **pnf, bool want_gc) 1087 { 1088 struct nfsd_file_lookup_key key = { 1089 .type = NFSD_FILE_KEY_FULL, 1090 .need = may_flags & NFSD_FILE_MAY_MASK, 1091 .net = SVC_NET(rqstp), 1092 .gc = want_gc, 1093 }; 1094 bool open_retry = true; 1095 struct nfsd_file *nf; 1096 __be32 status; 1097 int ret; 1098 1099 status = fh_verify(rqstp, fhp, S_IFREG, 1100 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1101 if (status != nfs_ok) 1102 return status; 1103 key.inode = d_inode(fhp->fh_dentry); 1104 key.cred = get_current_cred(); 1105 1106 retry: 1107 rcu_read_lock(); 1108 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 1109 nfsd_file_rhash_params); 1110 if (nf) 1111 nf = nfsd_file_get(nf); 1112 rcu_read_unlock(); 1113 1114 if (nf) { 1115 if (nfsd_file_lru_remove(nf)) 1116 WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); 1117 goto wait_for_construction; 1118 } 1119 1120 nf = nfsd_file_alloc(&key, may_flags); 1121 if (!nf) { 1122 status = nfserr_jukebox; 1123 goto out_status; 1124 } 1125 1126 ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, 1127 &key, &nf->nf_rhash, 1128 nfsd_file_rhash_params); 1129 if (likely(ret == 0)) 1130 goto open_file; 1131 1132 nfsd_file_slab_free(&nf->nf_rcu); 1133 nf = NULL; 1134 if (ret == -EEXIST) 1135 goto retry; 1136 trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); 1137 status = nfserr_jukebox; 1138 goto out_status; 1139 1140 wait_for_construction: 1141 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1142 1143 /* Did construction of this file fail? */ 1144 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1145 trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); 1146 if (!open_retry) { 1147 status = nfserr_jukebox; 1148 goto out; 1149 } 1150 open_retry = false; 1151 if (refcount_dec_and_test(&nf->nf_ref)) 1152 nfsd_file_free(nf); 1153 goto retry; 1154 } 1155 1156 this_cpu_inc(nfsd_file_cache_hits); 1157 1158 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1159 out: 1160 if (status == nfs_ok) { 1161 this_cpu_inc(nfsd_file_acquisitions); 1162 *pnf = nf; 1163 } else { 1164 if (refcount_dec_and_test(&nf->nf_ref)) 1165 nfsd_file_free(nf); 1166 nf = NULL; 1167 } 1168 1169 out_status: 1170 put_cred(key.cred); 1171 trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); 1172 return status; 1173 1174 open_file: 1175 trace_nfsd_file_alloc(nf); 1176 nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); 1177 if (nf->nf_mark) { 1178 if (file) { 1179 get_file(file); 1180 nf->nf_file = file; 1181 status = nfs_ok; 1182 trace_nfsd_file_opened(nf, status); 1183 } else { 1184 status = nfsd_open_verified(rqstp, fhp, may_flags, 1185 &nf->nf_file); 1186 trace_nfsd_file_open(nf, status); 1187 } 1188 } else 1189 status = nfserr_jukebox; 1190 /* 1191 * If construction failed, or we raced with a call to unlink() 1192 * then unhash. 1193 */ 1194 if (status == nfs_ok && key.inode->i_nlink == 0) 1195 status = nfserr_jukebox; 1196 if (status != nfs_ok) 1197 nfsd_file_unhash(nf); 1198 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1199 smp_mb__after_atomic(); 1200 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1201 goto out; 1202 } 1203 1204 /** 1205 * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file 1206 * @rqstp: the RPC transaction being executed 1207 * @fhp: the NFS filehandle of the file to be opened 1208 * @may_flags: NFSD_MAY_ settings for the file 1209 * @pnf: OUT: new or found "struct nfsd_file" object 1210 * 1211 * The nfsd_file object returned by this API is reference-counted 1212 * and garbage-collected. The object is retained for a few 1213 * seconds after the final nfsd_file_put() in case the caller 1214 * wants to re-use it. 1215 * 1216 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1217 * network byte order is returned. 1218 */ 1219 __be32 1220 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, 1221 unsigned int may_flags, struct nfsd_file **pnf) 1222 { 1223 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); 1224 } 1225 1226 /** 1227 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1228 * @rqstp: the RPC transaction being executed 1229 * @fhp: the NFS filehandle of the file to be opened 1230 * @may_flags: NFSD_MAY_ settings for the file 1231 * @pnf: OUT: new or found "struct nfsd_file" object 1232 * 1233 * The nfsd_file_object returned by this API is reference-counted 1234 * but not garbage-collected. The object is unhashed after the 1235 * final nfsd_file_put(). 1236 * 1237 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1238 * network byte order is returned. 1239 */ 1240 __be32 1241 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1242 unsigned int may_flags, struct nfsd_file **pnf) 1243 { 1244 return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); 1245 } 1246 1247 /** 1248 * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file 1249 * @rqstp: the RPC transaction being executed 1250 * @fhp: the NFS filehandle of the file just created 1251 * @may_flags: NFSD_MAY_ settings for the file 1252 * @file: cached, already-open file (may be NULL) 1253 * @pnf: OUT: new or found "struct nfsd_file" object 1254 * 1255 * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, 1256 * and @file is non-NULL, use it to instantiate a new nfsd_file instead of 1257 * opening a new one. 1258 * 1259 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1260 * network byte order is returned. 1261 */ 1262 __be32 1263 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, 1264 unsigned int may_flags, struct file *file, 1265 struct nfsd_file **pnf) 1266 { 1267 return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); 1268 } 1269 1270 /* 1271 * Note that fields may be added, removed or reordered in the future. Programs 1272 * scraping this file for info should test the labels to ensure they're 1273 * getting the correct field. 1274 */ 1275 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1276 { 1277 unsigned long releases = 0, evictions = 0; 1278 unsigned long hits = 0, acquisitions = 0; 1279 unsigned int i, count = 0, buckets = 0; 1280 unsigned long lru = 0, total_age = 0; 1281 1282 /* Serialize with server shutdown */ 1283 mutex_lock(&nfsd_mutex); 1284 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1285 struct bucket_table *tbl; 1286 struct rhashtable *ht; 1287 1288 lru = list_lru_count(&nfsd_file_lru); 1289 1290 rcu_read_lock(); 1291 ht = &nfsd_file_rhash_tbl; 1292 count = atomic_read(&ht->nelems); 1293 tbl = rht_dereference_rcu(ht->tbl, ht); 1294 buckets = tbl->size; 1295 rcu_read_unlock(); 1296 } 1297 mutex_unlock(&nfsd_mutex); 1298 1299 for_each_possible_cpu(i) { 1300 hits += per_cpu(nfsd_file_cache_hits, i); 1301 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1302 releases += per_cpu(nfsd_file_releases, i); 1303 total_age += per_cpu(nfsd_file_total_age, i); 1304 evictions += per_cpu(nfsd_file_evictions, i); 1305 } 1306 1307 seq_printf(m, "total entries: %u\n", count); 1308 seq_printf(m, "hash buckets: %u\n", buckets); 1309 seq_printf(m, "lru entries: %lu\n", lru); 1310 seq_printf(m, "cache hits: %lu\n", hits); 1311 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1312 seq_printf(m, "releases: %lu\n", releases); 1313 seq_printf(m, "evictions: %lu\n", evictions); 1314 if (releases) 1315 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1316 else 1317 seq_printf(m, "mean age (ms): -\n"); 1318 return 0; 1319 } 1320