1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/pagemap.h> 11 #include <linux/sched.h> 12 #include <linux/list_lru.h> 13 #include <linux/fsnotify_backend.h> 14 #include <linux/fsnotify.h> 15 #include <linux/seq_file.h> 16 #include <linux/rhashtable.h> 17 18 #include "vfs.h" 19 #include "nfsd.h" 20 #include "nfsfh.h" 21 #include "netns.h" 22 #include "filecache.h" 23 #include "trace.h" 24 25 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 26 27 #define NFSD_FILE_CACHE_UP (0) 28 29 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 30 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 31 32 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 33 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); 34 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); 35 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); 36 static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); 37 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); 38 39 struct nfsd_fcache_disposal { 40 struct work_struct work; 41 spinlock_t lock; 42 struct list_head freeme; 43 }; 44 45 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 46 47 static struct kmem_cache *nfsd_file_slab; 48 static struct kmem_cache *nfsd_file_mark_slab; 49 static struct list_lru nfsd_file_lru; 50 static unsigned long nfsd_file_flags; 51 static struct fsnotify_group *nfsd_file_fsnotify_group; 52 static struct delayed_work nfsd_filecache_laundrette; 53 static struct rhashtable nfsd_file_rhash_tbl 54 ____cacheline_aligned_in_smp; 55 56 enum nfsd_file_lookup_type { 57 NFSD_FILE_KEY_INODE, 58 NFSD_FILE_KEY_FULL, 59 }; 60 61 struct nfsd_file_lookup_key { 62 struct inode *inode; 63 struct net *net; 64 const struct cred *cred; 65 unsigned char need; 66 enum nfsd_file_lookup_type type; 67 }; 68 69 /* 70 * The returned hash value is based solely on the address of an in-code 71 * inode, a pointer to a slab-allocated object. The entropy in such a 72 * pointer is concentrated in its middle bits. 73 */ 74 static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) 75 { 76 unsigned long ptr = (unsigned long)inode; 77 u32 k; 78 79 k = ptr >> L1_CACHE_SHIFT; 80 k &= 0x00ffffff; 81 return jhash2(&k, 1, seed); 82 } 83 84 /** 85 * nfsd_file_key_hashfn - Compute the hash value of a lookup key 86 * @data: key on which to compute the hash value 87 * @len: rhash table's key_len parameter (unused) 88 * @seed: rhash table's random seed of the day 89 * 90 * Return value: 91 * Computed 32-bit hash value 92 */ 93 static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) 94 { 95 const struct nfsd_file_lookup_key *key = data; 96 97 return nfsd_file_inode_hash(key->inode, seed); 98 } 99 100 /** 101 * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file 102 * @data: object on which to compute the hash value 103 * @len: rhash table's key_len parameter (unused) 104 * @seed: rhash table's random seed of the day 105 * 106 * Return value: 107 * Computed 32-bit hash value 108 */ 109 static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) 110 { 111 const struct nfsd_file *nf = data; 112 113 return nfsd_file_inode_hash(nf->nf_inode, seed); 114 } 115 116 static bool 117 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 118 { 119 int i; 120 121 if (!uid_eq(c1->fsuid, c2->fsuid)) 122 return false; 123 if (!gid_eq(c1->fsgid, c2->fsgid)) 124 return false; 125 if (c1->group_info == NULL || c2->group_info == NULL) 126 return c1->group_info == c2->group_info; 127 if (c1->group_info->ngroups != c2->group_info->ngroups) 128 return false; 129 for (i = 0; i < c1->group_info->ngroups; i++) { 130 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 131 return false; 132 } 133 return true; 134 } 135 136 /** 137 * nfsd_file_obj_cmpfn - Match a cache item against search criteria 138 * @arg: search criteria 139 * @ptr: cache item to check 140 * 141 * Return values: 142 * %0 - Item matches search criteria 143 * %1 - Item does not match search criteria 144 */ 145 static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, 146 const void *ptr) 147 { 148 const struct nfsd_file_lookup_key *key = arg->key; 149 const struct nfsd_file *nf = ptr; 150 151 switch (key->type) { 152 case NFSD_FILE_KEY_INODE: 153 if (nf->nf_inode != key->inode) 154 return 1; 155 break; 156 case NFSD_FILE_KEY_FULL: 157 if (nf->nf_inode != key->inode) 158 return 1; 159 if (nf->nf_may != key->need) 160 return 1; 161 if (nf->nf_net != key->net) 162 return 1; 163 if (!nfsd_match_cred(nf->nf_cred, key->cred)) 164 return 1; 165 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) 166 return 1; 167 break; 168 } 169 return 0; 170 } 171 172 static const struct rhashtable_params nfsd_file_rhash_params = { 173 .key_len = sizeof_field(struct nfsd_file, nf_inode), 174 .key_offset = offsetof(struct nfsd_file, nf_inode), 175 .head_offset = offsetof(struct nfsd_file, nf_rhash), 176 .hashfn = nfsd_file_key_hashfn, 177 .obj_hashfn = nfsd_file_obj_hashfn, 178 .obj_cmpfn = nfsd_file_obj_cmpfn, 179 /* Reduce resizing churn on light workloads */ 180 .min_size = 512, /* buckets */ 181 .automatic_shrinking = true, 182 }; 183 184 static void 185 nfsd_file_schedule_laundrette(void) 186 { 187 if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || 188 test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 189 return; 190 191 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 192 NFSD_LAUNDRETTE_DELAY); 193 } 194 195 static void 196 nfsd_file_slab_free(struct rcu_head *rcu) 197 { 198 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 199 200 put_cred(nf->nf_cred); 201 kmem_cache_free(nfsd_file_slab, nf); 202 } 203 204 static void 205 nfsd_file_mark_free(struct fsnotify_mark *mark) 206 { 207 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 208 nfm_mark); 209 210 kmem_cache_free(nfsd_file_mark_slab, nfm); 211 } 212 213 static struct nfsd_file_mark * 214 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 215 { 216 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 217 return NULL; 218 return nfm; 219 } 220 221 static void 222 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 223 { 224 if (refcount_dec_and_test(&nfm->nfm_ref)) { 225 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 226 fsnotify_put_mark(&nfm->nfm_mark); 227 } 228 } 229 230 static struct nfsd_file_mark * 231 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) 232 { 233 int err; 234 struct fsnotify_mark *mark; 235 struct nfsd_file_mark *nfm = NULL, *new; 236 237 do { 238 fsnotify_group_lock(nfsd_file_fsnotify_group); 239 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 240 nfsd_file_fsnotify_group); 241 if (mark) { 242 nfm = nfsd_file_mark_get(container_of(mark, 243 struct nfsd_file_mark, 244 nfm_mark)); 245 fsnotify_group_unlock(nfsd_file_fsnotify_group); 246 if (nfm) { 247 fsnotify_put_mark(mark); 248 break; 249 } 250 /* Avoid soft lockup race with nfsd_file_mark_put() */ 251 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 252 fsnotify_put_mark(mark); 253 } else { 254 fsnotify_group_unlock(nfsd_file_fsnotify_group); 255 } 256 257 /* allocate a new nfm */ 258 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 259 if (!new) 260 return NULL; 261 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 262 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 263 refcount_set(&new->nfm_ref, 1); 264 265 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 266 267 /* 268 * If the add was successful, then return the object. 269 * Otherwise, we need to put the reference we hold on the 270 * nfm_mark. The fsnotify code will take a reference and put 271 * it on failure, so we can't just free it directly. It's also 272 * not safe to call fsnotify_destroy_mark on it as the 273 * mark->group will be NULL. Thus, we can't let the nfm_ref 274 * counter drive the destruction at this point. 275 */ 276 if (likely(!err)) 277 nfm = new; 278 else 279 fsnotify_put_mark(&new->nfm_mark); 280 } while (unlikely(err == -EEXIST)); 281 282 return nfm; 283 } 284 285 static struct nfsd_file * 286 nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) 287 { 288 struct nfsd_file *nf; 289 290 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 291 if (nf) { 292 INIT_LIST_HEAD(&nf->nf_lru); 293 nf->nf_birthtime = ktime_get(); 294 nf->nf_file = NULL; 295 nf->nf_cred = get_current_cred(); 296 nf->nf_net = key->net; 297 nf->nf_flags = 0; 298 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 299 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 300 nf->nf_inode = key->inode; 301 /* nf_ref is pre-incremented for hash table */ 302 refcount_set(&nf->nf_ref, 2); 303 nf->nf_may = key->need; 304 nf->nf_mark = NULL; 305 } 306 return nf; 307 } 308 309 static bool 310 nfsd_file_free(struct nfsd_file *nf) 311 { 312 s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); 313 bool flush = false; 314 315 this_cpu_inc(nfsd_file_releases); 316 this_cpu_add(nfsd_file_total_age, age); 317 318 trace_nfsd_file_put_final(nf); 319 if (nf->nf_mark) 320 nfsd_file_mark_put(nf->nf_mark); 321 if (nf->nf_file) { 322 get_file(nf->nf_file); 323 filp_close(nf->nf_file, NULL); 324 fput(nf->nf_file); 325 flush = true; 326 } 327 328 /* 329 * If this item is still linked via nf_lru, that's a bug. 330 * WARN and leak it to preserve system stability. 331 */ 332 if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) 333 return flush; 334 335 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 336 return flush; 337 } 338 339 static bool 340 nfsd_file_check_writeback(struct nfsd_file *nf) 341 { 342 struct file *file = nf->nf_file; 343 struct address_space *mapping; 344 345 if (!file || !(file->f_mode & FMODE_WRITE)) 346 return false; 347 mapping = file->f_mapping; 348 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 349 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 350 } 351 352 static int 353 nfsd_file_check_write_error(struct nfsd_file *nf) 354 { 355 struct file *file = nf->nf_file; 356 357 if (!file || !(file->f_mode & FMODE_WRITE)) 358 return 0; 359 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 360 } 361 362 static void 363 nfsd_file_flush(struct nfsd_file *nf) 364 { 365 struct file *file = nf->nf_file; 366 367 if (!file || !(file->f_mode & FMODE_WRITE)) 368 return; 369 this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); 370 if (vfs_fsync(file, 1) != 0) 371 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 372 } 373 374 static void nfsd_file_lru_add(struct nfsd_file *nf) 375 { 376 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 377 if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) 378 trace_nfsd_file_lru_add(nf); 379 } 380 381 static void nfsd_file_lru_remove(struct nfsd_file *nf) 382 { 383 if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) 384 trace_nfsd_file_lru_del(nf); 385 } 386 387 static void 388 nfsd_file_hash_remove(struct nfsd_file *nf) 389 { 390 trace_nfsd_file_unhash(nf); 391 392 if (nfsd_file_check_write_error(nf)) 393 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 394 rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, 395 nfsd_file_rhash_params); 396 } 397 398 static bool 399 nfsd_file_unhash(struct nfsd_file *nf) 400 { 401 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 402 nfsd_file_hash_remove(nf); 403 return true; 404 } 405 return false; 406 } 407 408 static void 409 nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) 410 { 411 trace_nfsd_file_unhash_and_dispose(nf); 412 if (nfsd_file_unhash(nf)) { 413 /* caller must call nfsd_file_dispose_list() later */ 414 nfsd_file_lru_remove(nf); 415 list_add(&nf->nf_lru, dispose); 416 } 417 } 418 419 static void 420 nfsd_file_put_noref(struct nfsd_file *nf) 421 { 422 trace_nfsd_file_put(nf); 423 424 if (refcount_dec_and_test(&nf->nf_ref)) { 425 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 426 nfsd_file_lru_remove(nf); 427 nfsd_file_free(nf); 428 } 429 } 430 431 void 432 nfsd_file_put(struct nfsd_file *nf) 433 { 434 might_sleep(); 435 436 nfsd_file_lru_add(nf); 437 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 438 nfsd_file_flush(nf); 439 nfsd_file_put_noref(nf); 440 } else if (nf->nf_file) { 441 nfsd_file_put_noref(nf); 442 nfsd_file_schedule_laundrette(); 443 } else 444 nfsd_file_put_noref(nf); 445 } 446 447 /** 448 * nfsd_file_close - Close an nfsd_file 449 * @nf: nfsd_file to close 450 * 451 * If this is the final reference for @nf, free it immediately. 452 * This reflects an on-the-wire CLOSE or DELEGRETURN into the 453 * VFS and exported filesystem. 454 */ 455 void nfsd_file_close(struct nfsd_file *nf) 456 { 457 nfsd_file_put(nf); 458 if (refcount_dec_if_one(&nf->nf_ref)) { 459 nfsd_file_unhash(nf); 460 nfsd_file_lru_remove(nf); 461 nfsd_file_free(nf); 462 } 463 } 464 465 struct nfsd_file * 466 nfsd_file_get(struct nfsd_file *nf) 467 { 468 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 469 return nf; 470 return NULL; 471 } 472 473 static void 474 nfsd_file_dispose_list(struct list_head *dispose) 475 { 476 struct nfsd_file *nf; 477 478 while(!list_empty(dispose)) { 479 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 480 list_del_init(&nf->nf_lru); 481 nfsd_file_flush(nf); 482 nfsd_file_put_noref(nf); 483 } 484 } 485 486 static void 487 nfsd_file_dispose_list_sync(struct list_head *dispose) 488 { 489 bool flush = false; 490 struct nfsd_file *nf; 491 492 while(!list_empty(dispose)) { 493 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 494 list_del_init(&nf->nf_lru); 495 nfsd_file_flush(nf); 496 if (!refcount_dec_and_test(&nf->nf_ref)) 497 continue; 498 if (nfsd_file_free(nf)) 499 flush = true; 500 } 501 if (flush) 502 flush_delayed_fput(); 503 } 504 505 static void 506 nfsd_file_list_remove_disposal(struct list_head *dst, 507 struct nfsd_fcache_disposal *l) 508 { 509 spin_lock(&l->lock); 510 list_splice_init(&l->freeme, dst); 511 spin_unlock(&l->lock); 512 } 513 514 static void 515 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 516 { 517 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 518 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 519 520 spin_lock(&l->lock); 521 list_splice_tail_init(files, &l->freeme); 522 spin_unlock(&l->lock); 523 queue_work(nfsd_filecache_wq, &l->work); 524 } 525 526 static void 527 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 528 struct net *net) 529 { 530 struct nfsd_file *nf, *tmp; 531 532 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 533 if (nf->nf_net == net) 534 list_move_tail(&nf->nf_lru, dst); 535 } 536 } 537 538 static void 539 nfsd_file_dispose_list_delayed(struct list_head *dispose) 540 { 541 LIST_HEAD(list); 542 struct nfsd_file *nf; 543 544 while(!list_empty(dispose)) { 545 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 546 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 547 nfsd_file_list_add_disposal(&list, nf->nf_net); 548 } 549 } 550 551 /** 552 * nfsd_file_lru_cb - Examine an entry on the LRU list 553 * @item: LRU entry to examine 554 * @lru: controlling LRU 555 * @lock: LRU list lock (unused) 556 * @arg: dispose list 557 * 558 * Return values: 559 * %LRU_REMOVED: @item was removed from the LRU 560 * %LRU_ROTATE: @item is to be moved to the LRU tail 561 * %LRU_SKIP: @item cannot be evicted 562 */ 563 static enum lru_status 564 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 565 spinlock_t *lock, void *arg) 566 __releases(lock) 567 __acquires(lock) 568 { 569 struct list_head *head = arg; 570 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 571 572 /* 573 * Do a lockless refcount check. The hashtable holds one reference, so 574 * we look to see if anything else has a reference, or if any have 575 * been put since the shrinker last ran. Those don't get unhashed and 576 * released. 577 * 578 * Note that in the put path, we set the flag and then decrement the 579 * counter. Here we check the counter and then test and clear the flag. 580 * That order is deliberate to ensure that we can do this locklessly. 581 */ 582 if (refcount_read(&nf->nf_ref) > 1) { 583 list_lru_isolate(lru, &nf->nf_lru); 584 trace_nfsd_file_gc_in_use(nf); 585 return LRU_REMOVED; 586 } 587 588 /* 589 * Don't throw out files that are still undergoing I/O or 590 * that have uncleared errors pending. 591 */ 592 if (nfsd_file_check_writeback(nf)) { 593 trace_nfsd_file_gc_writeback(nf); 594 return LRU_SKIP; 595 } 596 597 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { 598 trace_nfsd_file_gc_referenced(nf); 599 return LRU_ROTATE; 600 } 601 602 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 603 trace_nfsd_file_gc_hashed(nf); 604 return LRU_SKIP; 605 } 606 607 list_lru_isolate_move(lru, &nf->nf_lru, head); 608 this_cpu_inc(nfsd_file_evictions); 609 trace_nfsd_file_gc_disposed(nf); 610 return LRU_REMOVED; 611 } 612 613 /* 614 * Unhash items on @dispose immediately, then queue them on the 615 * disposal workqueue to finish releasing them in the background. 616 * 617 * cel: Note that between the time list_lru_shrink_walk runs and 618 * now, these items are in the hash table but marked unhashed. 619 * Why release these outside of lru_cb ? There's no lock ordering 620 * problem since lru_cb currently takes no lock. 621 */ 622 static void nfsd_file_gc_dispose_list(struct list_head *dispose) 623 { 624 struct nfsd_file *nf; 625 626 list_for_each_entry(nf, dispose, nf_lru) 627 nfsd_file_hash_remove(nf); 628 nfsd_file_dispose_list_delayed(dispose); 629 } 630 631 static void 632 nfsd_file_gc(void) 633 { 634 LIST_HEAD(dispose); 635 unsigned long ret; 636 637 ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, 638 &dispose, list_lru_count(&nfsd_file_lru)); 639 trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); 640 nfsd_file_gc_dispose_list(&dispose); 641 } 642 643 static void 644 nfsd_file_gc_worker(struct work_struct *work) 645 { 646 nfsd_file_gc(); 647 nfsd_file_schedule_laundrette(); 648 } 649 650 static unsigned long 651 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 652 { 653 return list_lru_count(&nfsd_file_lru); 654 } 655 656 static unsigned long 657 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 658 { 659 LIST_HEAD(dispose); 660 unsigned long ret; 661 662 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 663 nfsd_file_lru_cb, &dispose); 664 trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); 665 nfsd_file_gc_dispose_list(&dispose); 666 return ret; 667 } 668 669 static struct shrinker nfsd_file_shrinker = { 670 .scan_objects = nfsd_file_lru_scan, 671 .count_objects = nfsd_file_lru_count, 672 .seeks = 1, 673 }; 674 675 /* 676 * Find all cache items across all net namespaces that match @inode and 677 * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). 678 */ 679 static unsigned int 680 __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) 681 { 682 struct nfsd_file_lookup_key key = { 683 .type = NFSD_FILE_KEY_INODE, 684 .inode = inode, 685 }; 686 unsigned int count = 0; 687 struct nfsd_file *nf; 688 689 rcu_read_lock(); 690 do { 691 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 692 nfsd_file_rhash_params); 693 if (!nf) 694 break; 695 nfsd_file_unhash_and_dispose(nf, dispose); 696 count++; 697 } while (1); 698 rcu_read_unlock(); 699 return count; 700 } 701 702 /** 703 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 704 * @inode: inode of the file to attempt to remove 705 * 706 * Unhash and put, then flush and fput all cache items associated with @inode. 707 */ 708 void 709 nfsd_file_close_inode_sync(struct inode *inode) 710 { 711 LIST_HEAD(dispose); 712 unsigned int count; 713 714 count = __nfsd_file_close_inode(inode, &dispose); 715 trace_nfsd_file_close_inode_sync(inode, count); 716 nfsd_file_dispose_list_sync(&dispose); 717 } 718 719 /** 720 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 721 * @inode: inode of the file to attempt to remove 722 * 723 * Unhash and put all cache item associated with @inode. 724 */ 725 static void 726 nfsd_file_close_inode(struct inode *inode) 727 { 728 LIST_HEAD(dispose); 729 unsigned int count; 730 731 count = __nfsd_file_close_inode(inode, &dispose); 732 trace_nfsd_file_close_inode(inode, count); 733 nfsd_file_dispose_list_delayed(&dispose); 734 } 735 736 /** 737 * nfsd_file_delayed_close - close unused nfsd_files 738 * @work: dummy 739 * 740 * Walk the LRU list and close any entries that have not been used since 741 * the last scan. 742 */ 743 static void 744 nfsd_file_delayed_close(struct work_struct *work) 745 { 746 LIST_HEAD(head); 747 struct nfsd_fcache_disposal *l = container_of(work, 748 struct nfsd_fcache_disposal, work); 749 750 nfsd_file_list_remove_disposal(&head, l); 751 nfsd_file_dispose_list(&head); 752 } 753 754 static int 755 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 756 void *data) 757 { 758 struct file_lock *fl = data; 759 760 /* Only close files for F_SETLEASE leases */ 761 if (fl->fl_flags & FL_LEASE) 762 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 763 return 0; 764 } 765 766 static struct notifier_block nfsd_file_lease_notifier = { 767 .notifier_call = nfsd_file_lease_notifier_call, 768 }; 769 770 static int 771 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 772 struct inode *inode, struct inode *dir, 773 const struct qstr *name, u32 cookie) 774 { 775 if (WARN_ON_ONCE(!inode)) 776 return 0; 777 778 trace_nfsd_file_fsnotify_handle_event(inode, mask); 779 780 /* Should be no marks on non-regular files */ 781 if (!S_ISREG(inode->i_mode)) { 782 WARN_ON_ONCE(1); 783 return 0; 784 } 785 786 /* don't close files if this was not the last link */ 787 if (mask & FS_ATTRIB) { 788 if (inode->i_nlink) 789 return 0; 790 } 791 792 nfsd_file_close_inode(inode); 793 return 0; 794 } 795 796 797 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 798 .handle_inode_event = nfsd_file_fsnotify_handle_event, 799 .free_mark = nfsd_file_mark_free, 800 }; 801 802 int 803 nfsd_file_cache_init(void) 804 { 805 int ret; 806 807 lockdep_assert_held(&nfsd_mutex); 808 if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 809 return 0; 810 811 ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); 812 if (ret) 813 return ret; 814 815 ret = -ENOMEM; 816 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 817 if (!nfsd_filecache_wq) 818 goto out; 819 820 nfsd_file_slab = kmem_cache_create("nfsd_file", 821 sizeof(struct nfsd_file), 0, 0, NULL); 822 if (!nfsd_file_slab) { 823 pr_err("nfsd: unable to create nfsd_file_slab\n"); 824 goto out_err; 825 } 826 827 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 828 sizeof(struct nfsd_file_mark), 0, 0, NULL); 829 if (!nfsd_file_mark_slab) { 830 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 831 goto out_err; 832 } 833 834 835 ret = list_lru_init(&nfsd_file_lru); 836 if (ret) { 837 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 838 goto out_err; 839 } 840 841 ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); 842 if (ret) { 843 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 844 goto out_lru; 845 } 846 847 ret = lease_register_notifier(&nfsd_file_lease_notifier); 848 if (ret) { 849 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 850 goto out_shrinker; 851 } 852 853 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 854 FSNOTIFY_GROUP_NOFS); 855 if (IS_ERR(nfsd_file_fsnotify_group)) { 856 pr_err("nfsd: unable to create fsnotify group: %ld\n", 857 PTR_ERR(nfsd_file_fsnotify_group)); 858 ret = PTR_ERR(nfsd_file_fsnotify_group); 859 nfsd_file_fsnotify_group = NULL; 860 goto out_notifier; 861 } 862 863 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 864 out: 865 return ret; 866 out_notifier: 867 lease_unregister_notifier(&nfsd_file_lease_notifier); 868 out_shrinker: 869 unregister_shrinker(&nfsd_file_shrinker); 870 out_lru: 871 list_lru_destroy(&nfsd_file_lru); 872 out_err: 873 kmem_cache_destroy(nfsd_file_slab); 874 nfsd_file_slab = NULL; 875 kmem_cache_destroy(nfsd_file_mark_slab); 876 nfsd_file_mark_slab = NULL; 877 destroy_workqueue(nfsd_filecache_wq); 878 nfsd_filecache_wq = NULL; 879 rhashtable_destroy(&nfsd_file_rhash_tbl); 880 goto out; 881 } 882 883 static void 884 __nfsd_file_cache_purge(struct net *net) 885 { 886 struct rhashtable_iter iter; 887 struct nfsd_file *nf; 888 LIST_HEAD(dispose); 889 890 rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); 891 do { 892 rhashtable_walk_start(&iter); 893 894 nf = rhashtable_walk_next(&iter); 895 while (!IS_ERR_OR_NULL(nf)) { 896 if (net && nf->nf_net != net) 897 continue; 898 nfsd_file_unhash_and_dispose(nf, &dispose); 899 nf = rhashtable_walk_next(&iter); 900 } 901 902 rhashtable_walk_stop(&iter); 903 } while (nf == ERR_PTR(-EAGAIN)); 904 rhashtable_walk_exit(&iter); 905 906 nfsd_file_dispose_list(&dispose); 907 } 908 909 static struct nfsd_fcache_disposal * 910 nfsd_alloc_fcache_disposal(void) 911 { 912 struct nfsd_fcache_disposal *l; 913 914 l = kmalloc(sizeof(*l), GFP_KERNEL); 915 if (!l) 916 return NULL; 917 INIT_WORK(&l->work, nfsd_file_delayed_close); 918 spin_lock_init(&l->lock); 919 INIT_LIST_HEAD(&l->freeme); 920 return l; 921 } 922 923 static void 924 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 925 { 926 cancel_work_sync(&l->work); 927 nfsd_file_dispose_list(&l->freeme); 928 kfree(l); 929 } 930 931 static void 932 nfsd_free_fcache_disposal_net(struct net *net) 933 { 934 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 935 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 936 937 nfsd_free_fcache_disposal(l); 938 } 939 940 int 941 nfsd_file_cache_start_net(struct net *net) 942 { 943 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 944 945 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 946 return nn->fcache_disposal ? 0 : -ENOMEM; 947 } 948 949 /** 950 * nfsd_file_cache_purge - Remove all cache items associated with @net 951 * @net: target net namespace 952 * 953 */ 954 void 955 nfsd_file_cache_purge(struct net *net) 956 { 957 lockdep_assert_held(&nfsd_mutex); 958 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) 959 __nfsd_file_cache_purge(net); 960 } 961 962 void 963 nfsd_file_cache_shutdown_net(struct net *net) 964 { 965 nfsd_file_cache_purge(net); 966 nfsd_free_fcache_disposal_net(net); 967 } 968 969 void 970 nfsd_file_cache_shutdown(void) 971 { 972 int i; 973 974 lockdep_assert_held(&nfsd_mutex); 975 if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) 976 return; 977 978 lease_unregister_notifier(&nfsd_file_lease_notifier); 979 unregister_shrinker(&nfsd_file_shrinker); 980 /* 981 * make sure all callers of nfsd_file_lru_cb are done before 982 * calling nfsd_file_cache_purge 983 */ 984 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 985 __nfsd_file_cache_purge(NULL); 986 list_lru_destroy(&nfsd_file_lru); 987 rcu_barrier(); 988 fsnotify_put_group(nfsd_file_fsnotify_group); 989 nfsd_file_fsnotify_group = NULL; 990 kmem_cache_destroy(nfsd_file_slab); 991 nfsd_file_slab = NULL; 992 fsnotify_wait_marks_destroyed(); 993 kmem_cache_destroy(nfsd_file_mark_slab); 994 nfsd_file_mark_slab = NULL; 995 destroy_workqueue(nfsd_filecache_wq); 996 nfsd_filecache_wq = NULL; 997 rhashtable_destroy(&nfsd_file_rhash_tbl); 998 999 for_each_possible_cpu(i) { 1000 per_cpu(nfsd_file_cache_hits, i) = 0; 1001 per_cpu(nfsd_file_acquisitions, i) = 0; 1002 per_cpu(nfsd_file_releases, i) = 0; 1003 per_cpu(nfsd_file_total_age, i) = 0; 1004 per_cpu(nfsd_file_pages_flushed, i) = 0; 1005 per_cpu(nfsd_file_evictions, i) = 0; 1006 } 1007 } 1008 1009 /** 1010 * nfsd_file_is_cached - are there any cached open files for this inode? 1011 * @inode: inode to check 1012 * 1013 * The lookup matches inodes in all net namespaces and is atomic wrt 1014 * nfsd_file_acquire(). 1015 * 1016 * Return values: 1017 * %true: filecache contains at least one file matching this inode 1018 * %false: filecache contains no files matching this inode 1019 */ 1020 bool 1021 nfsd_file_is_cached(struct inode *inode) 1022 { 1023 struct nfsd_file_lookup_key key = { 1024 .type = NFSD_FILE_KEY_INODE, 1025 .inode = inode, 1026 }; 1027 bool ret = false; 1028 1029 if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, 1030 nfsd_file_rhash_params) != NULL) 1031 ret = true; 1032 trace_nfsd_file_is_cached(inode, (int)ret); 1033 return ret; 1034 } 1035 1036 static __be32 1037 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1038 unsigned int may_flags, struct nfsd_file **pnf, bool open) 1039 { 1040 struct nfsd_file_lookup_key key = { 1041 .type = NFSD_FILE_KEY_FULL, 1042 .need = may_flags & NFSD_FILE_MAY_MASK, 1043 .net = SVC_NET(rqstp), 1044 }; 1045 bool open_retry = true; 1046 struct nfsd_file *nf; 1047 __be32 status; 1048 int ret; 1049 1050 status = fh_verify(rqstp, fhp, S_IFREG, 1051 may_flags|NFSD_MAY_OWNER_OVERRIDE); 1052 if (status != nfs_ok) 1053 return status; 1054 key.inode = d_inode(fhp->fh_dentry); 1055 key.cred = get_current_cred(); 1056 1057 retry: 1058 rcu_read_lock(); 1059 nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, 1060 nfsd_file_rhash_params); 1061 if (nf) 1062 nf = nfsd_file_get(nf); 1063 rcu_read_unlock(); 1064 if (nf) 1065 goto wait_for_construction; 1066 1067 nf = nfsd_file_alloc(&key, may_flags); 1068 if (!nf) { 1069 status = nfserr_jukebox; 1070 goto out_status; 1071 } 1072 1073 ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, 1074 &key, &nf->nf_rhash, 1075 nfsd_file_rhash_params); 1076 if (likely(ret == 0)) 1077 goto open_file; 1078 1079 nfsd_file_slab_free(&nf->nf_rcu); 1080 if (ret == -EEXIST) 1081 goto retry; 1082 trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); 1083 status = nfserr_jukebox; 1084 goto out_status; 1085 1086 wait_for_construction: 1087 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 1088 1089 /* Did construction of this file fail? */ 1090 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 1091 trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); 1092 if (!open_retry) { 1093 status = nfserr_jukebox; 1094 goto out; 1095 } 1096 open_retry = false; 1097 nfsd_file_put_noref(nf); 1098 goto retry; 1099 } 1100 1101 nfsd_file_lru_remove(nf); 1102 this_cpu_inc(nfsd_file_cache_hits); 1103 1104 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 1105 out: 1106 if (status == nfs_ok) { 1107 if (open) 1108 this_cpu_inc(nfsd_file_acquisitions); 1109 *pnf = nf; 1110 } else { 1111 nfsd_file_put(nf); 1112 nf = NULL; 1113 } 1114 1115 out_status: 1116 put_cred(key.cred); 1117 if (open) 1118 trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); 1119 return status; 1120 1121 open_file: 1122 trace_nfsd_file_alloc(nf); 1123 nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); 1124 if (nf->nf_mark) { 1125 if (open) { 1126 status = nfsd_open_verified(rqstp, fhp, may_flags, 1127 &nf->nf_file); 1128 trace_nfsd_file_open(nf, status); 1129 } else 1130 status = nfs_ok; 1131 } else 1132 status = nfserr_jukebox; 1133 /* 1134 * If construction failed, or we raced with a call to unlink() 1135 * then unhash. 1136 */ 1137 if (status != nfs_ok || key.inode->i_nlink == 0) 1138 if (nfsd_file_unhash(nf)) 1139 nfsd_file_put_noref(nf); 1140 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1141 smp_mb__after_atomic(); 1142 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1143 goto out; 1144 } 1145 1146 /** 1147 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1148 * @rqstp: the RPC transaction being executed 1149 * @fhp: the NFS filehandle of the file to be opened 1150 * @may_flags: NFSD_MAY_ settings for the file 1151 * @pnf: OUT: new or found "struct nfsd_file" object 1152 * 1153 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1154 * network byte order is returned. 1155 */ 1156 __be32 1157 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1158 unsigned int may_flags, struct nfsd_file **pnf) 1159 { 1160 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); 1161 } 1162 1163 /** 1164 * nfsd_file_create - Get a struct nfsd_file, do not open 1165 * @rqstp: the RPC transaction being executed 1166 * @fhp: the NFS filehandle of the file just created 1167 * @may_flags: NFSD_MAY_ settings for the file 1168 * @pnf: OUT: new or found "struct nfsd_file" object 1169 * 1170 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1171 * network byte order is returned. 1172 */ 1173 __be32 1174 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1175 unsigned int may_flags, struct nfsd_file **pnf) 1176 { 1177 return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); 1178 } 1179 1180 /* 1181 * Note that fields may be added, removed or reordered in the future. Programs 1182 * scraping this file for info should test the labels to ensure they're 1183 * getting the correct field. 1184 */ 1185 int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1186 { 1187 unsigned long releases = 0, pages_flushed = 0, evictions = 0; 1188 unsigned long hits = 0, acquisitions = 0; 1189 unsigned int i, count = 0, buckets = 0; 1190 unsigned long lru = 0, total_age = 0; 1191 1192 /* Serialize with server shutdown */ 1193 mutex_lock(&nfsd_mutex); 1194 if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { 1195 struct bucket_table *tbl; 1196 struct rhashtable *ht; 1197 1198 lru = list_lru_count(&nfsd_file_lru); 1199 1200 rcu_read_lock(); 1201 ht = &nfsd_file_rhash_tbl; 1202 count = atomic_read(&ht->nelems); 1203 tbl = rht_dereference_rcu(ht->tbl, ht); 1204 buckets = tbl->size; 1205 rcu_read_unlock(); 1206 } 1207 mutex_unlock(&nfsd_mutex); 1208 1209 for_each_possible_cpu(i) { 1210 hits += per_cpu(nfsd_file_cache_hits, i); 1211 acquisitions += per_cpu(nfsd_file_acquisitions, i); 1212 releases += per_cpu(nfsd_file_releases, i); 1213 total_age += per_cpu(nfsd_file_total_age, i); 1214 evictions += per_cpu(nfsd_file_evictions, i); 1215 pages_flushed += per_cpu(nfsd_file_pages_flushed, i); 1216 } 1217 1218 seq_printf(m, "total entries: %u\n", count); 1219 seq_printf(m, "hash buckets: %u\n", buckets); 1220 seq_printf(m, "lru entries: %lu\n", lru); 1221 seq_printf(m, "cache hits: %lu\n", hits); 1222 seq_printf(m, "acquisitions: %lu\n", acquisitions); 1223 seq_printf(m, "releases: %lu\n", releases); 1224 seq_printf(m, "evictions: %lu\n", evictions); 1225 if (releases) 1226 seq_printf(m, "mean age (ms): %ld\n", total_age / releases); 1227 else 1228 seq_printf(m, "mean age (ms): -\n"); 1229 seq_printf(m, "pages flushed: %lu\n", pages_flushed); 1230 return 0; 1231 } 1232