1 /* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7 #include <linux/hash.h> 8 #include <linux/slab.h> 9 #include <linux/file.h> 10 #include <linux/pagemap.h> 11 #include <linux/sched.h> 12 #include <linux/list_lru.h> 13 #include <linux/fsnotify_backend.h> 14 #include <linux/fsnotify.h> 15 #include <linux/seq_file.h> 16 17 #include "vfs.h" 18 #include "nfsd.h" 19 #include "nfsfh.h" 20 #include "netns.h" 21 #include "filecache.h" 22 #include "trace.h" 23 24 #define NFSDDBG_FACILITY NFSDDBG_FH 25 26 /* FIXME: dynamically size this for the machine somehow? */ 27 #define NFSD_FILE_HASH_BITS 12 28 #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 29 #define NFSD_LAUNDRETTE_DELAY (2 * HZ) 30 31 #define NFSD_FILE_SHUTDOWN (1) 32 #define NFSD_FILE_LRU_THRESHOLD (4096UL) 33 #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 35 /* We only care about NFSD_MAY_READ/WRITE for this cache */ 36 #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 38 struct nfsd_fcache_bucket { 39 struct hlist_head nfb_head; 40 spinlock_t nfb_lock; 41 unsigned int nfb_count; 42 unsigned int nfb_maxcount; 43 }; 44 45 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 47 struct nfsd_fcache_disposal { 48 struct work_struct work; 49 spinlock_t lock; 50 struct list_head freeme; 51 }; 52 53 static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 54 55 static struct kmem_cache *nfsd_file_slab; 56 static struct kmem_cache *nfsd_file_mark_slab; 57 static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 58 static struct list_lru nfsd_file_lru; 59 static long nfsd_file_lru_flags; 60 static struct fsnotify_group *nfsd_file_fsnotify_group; 61 static atomic_long_t nfsd_filecache_count; 62 static struct delayed_work nfsd_filecache_laundrette; 63 64 static void nfsd_file_gc(void); 65 66 static void 67 nfsd_file_schedule_laundrette(void) 68 { 69 long count = atomic_long_read(&nfsd_filecache_count); 70 71 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 72 return; 73 74 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 75 NFSD_LAUNDRETTE_DELAY); 76 } 77 78 static void 79 nfsd_file_slab_free(struct rcu_head *rcu) 80 { 81 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 82 83 put_cred(nf->nf_cred); 84 kmem_cache_free(nfsd_file_slab, nf); 85 } 86 87 static void 88 nfsd_file_mark_free(struct fsnotify_mark *mark) 89 { 90 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 91 nfm_mark); 92 93 kmem_cache_free(nfsd_file_mark_slab, nfm); 94 } 95 96 static struct nfsd_file_mark * 97 nfsd_file_mark_get(struct nfsd_file_mark *nfm) 98 { 99 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 100 return NULL; 101 return nfm; 102 } 103 104 static void 105 nfsd_file_mark_put(struct nfsd_file_mark *nfm) 106 { 107 if (refcount_dec_and_test(&nfm->nfm_ref)) { 108 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 109 fsnotify_put_mark(&nfm->nfm_mark); 110 } 111 } 112 113 static struct nfsd_file_mark * 114 nfsd_file_mark_find_or_create(struct nfsd_file *nf) 115 { 116 int err; 117 struct fsnotify_mark *mark; 118 struct nfsd_file_mark *nfm = NULL, *new; 119 struct inode *inode = nf->nf_inode; 120 121 do { 122 fsnotify_group_lock(nfsd_file_fsnotify_group); 123 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 124 nfsd_file_fsnotify_group); 125 if (mark) { 126 nfm = nfsd_file_mark_get(container_of(mark, 127 struct nfsd_file_mark, 128 nfm_mark)); 129 fsnotify_group_unlock(nfsd_file_fsnotify_group); 130 if (nfm) { 131 fsnotify_put_mark(mark); 132 break; 133 } 134 /* Avoid soft lockup race with nfsd_file_mark_put() */ 135 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 136 fsnotify_put_mark(mark); 137 } else { 138 fsnotify_group_unlock(nfsd_file_fsnotify_group); 139 } 140 141 /* allocate a new nfm */ 142 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 143 if (!new) 144 return NULL; 145 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 146 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 147 refcount_set(&new->nfm_ref, 1); 148 149 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 150 151 /* 152 * If the add was successful, then return the object. 153 * Otherwise, we need to put the reference we hold on the 154 * nfm_mark. The fsnotify code will take a reference and put 155 * it on failure, so we can't just free it directly. It's also 156 * not safe to call fsnotify_destroy_mark on it as the 157 * mark->group will be NULL. Thus, we can't let the nfm_ref 158 * counter drive the destruction at this point. 159 */ 160 if (likely(!err)) 161 nfm = new; 162 else 163 fsnotify_put_mark(&new->nfm_mark); 164 } while (unlikely(err == -EEXIST)); 165 166 return nfm; 167 } 168 169 static struct nfsd_file * 170 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 171 struct net *net) 172 { 173 struct nfsd_file *nf; 174 175 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 176 if (nf) { 177 INIT_HLIST_NODE(&nf->nf_node); 178 INIT_LIST_HEAD(&nf->nf_lru); 179 nf->nf_file = NULL; 180 nf->nf_cred = get_current_cred(); 181 nf->nf_net = net; 182 nf->nf_flags = 0; 183 nf->nf_inode = inode; 184 nf->nf_hashval = hashval; 185 refcount_set(&nf->nf_ref, 1); 186 nf->nf_may = may & NFSD_FILE_MAY_MASK; 187 nf->nf_mark = NULL; 188 trace_nfsd_file_alloc(nf); 189 } 190 return nf; 191 } 192 193 static bool 194 nfsd_file_free(struct nfsd_file *nf) 195 { 196 bool flush = false; 197 198 trace_nfsd_file_put_final(nf); 199 if (nf->nf_mark) 200 nfsd_file_mark_put(nf->nf_mark); 201 if (nf->nf_file) { 202 get_file(nf->nf_file); 203 filp_close(nf->nf_file, NULL); 204 fput(nf->nf_file); 205 flush = true; 206 } 207 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 208 return flush; 209 } 210 211 static bool 212 nfsd_file_check_writeback(struct nfsd_file *nf) 213 { 214 struct file *file = nf->nf_file; 215 struct address_space *mapping; 216 217 if (!file || !(file->f_mode & FMODE_WRITE)) 218 return false; 219 mapping = file->f_mapping; 220 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 221 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 222 } 223 224 static int 225 nfsd_file_check_write_error(struct nfsd_file *nf) 226 { 227 struct file *file = nf->nf_file; 228 229 if (!file || !(file->f_mode & FMODE_WRITE)) 230 return 0; 231 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 232 } 233 234 static void 235 nfsd_file_flush(struct nfsd_file *nf) 236 { 237 if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) 238 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 239 } 240 241 static void 242 nfsd_file_do_unhash(struct nfsd_file *nf) 243 { 244 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 245 246 trace_nfsd_file_unhash(nf); 247 248 if (nfsd_file_check_write_error(nf)) 249 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 250 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 251 hlist_del_rcu(&nf->nf_node); 252 atomic_long_dec(&nfsd_filecache_count); 253 } 254 255 static bool 256 nfsd_file_unhash(struct nfsd_file *nf) 257 { 258 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 259 nfsd_file_do_unhash(nf); 260 if (!list_empty(&nf->nf_lru)) 261 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 262 return true; 263 } 264 return false; 265 } 266 267 /* 268 * Return true if the file was unhashed. 269 */ 270 static bool 271 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 272 { 273 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 274 275 trace_nfsd_file_unhash_and_release_locked(nf); 276 if (!nfsd_file_unhash(nf)) 277 return false; 278 /* keep final reference for nfsd_file_lru_dispose */ 279 if (refcount_dec_not_one(&nf->nf_ref)) 280 return true; 281 282 list_add(&nf->nf_lru, dispose); 283 return true; 284 } 285 286 static void 287 nfsd_file_put_noref(struct nfsd_file *nf) 288 { 289 trace_nfsd_file_put(nf); 290 291 if (refcount_dec_and_test(&nf->nf_ref)) { 292 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 293 nfsd_file_free(nf); 294 } 295 } 296 297 void 298 nfsd_file_put(struct nfsd_file *nf) 299 { 300 might_sleep(); 301 302 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 303 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 304 nfsd_file_flush(nf); 305 nfsd_file_put_noref(nf); 306 } else if (nf->nf_file) { 307 nfsd_file_put_noref(nf); 308 nfsd_file_schedule_laundrette(); 309 } else 310 nfsd_file_put_noref(nf); 311 312 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 313 nfsd_file_gc(); 314 } 315 316 struct nfsd_file * 317 nfsd_file_get(struct nfsd_file *nf) 318 { 319 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 320 return nf; 321 return NULL; 322 } 323 324 static void 325 nfsd_file_dispose_list(struct list_head *dispose) 326 { 327 struct nfsd_file *nf; 328 329 while(!list_empty(dispose)) { 330 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 331 list_del(&nf->nf_lru); 332 nfsd_file_flush(nf); 333 nfsd_file_put_noref(nf); 334 } 335 } 336 337 static void 338 nfsd_file_dispose_list_sync(struct list_head *dispose) 339 { 340 bool flush = false; 341 struct nfsd_file *nf; 342 343 while(!list_empty(dispose)) { 344 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 345 list_del(&nf->nf_lru); 346 nfsd_file_flush(nf); 347 if (!refcount_dec_and_test(&nf->nf_ref)) 348 continue; 349 if (nfsd_file_free(nf)) 350 flush = true; 351 } 352 if (flush) 353 flush_delayed_fput(); 354 } 355 356 static void 357 nfsd_file_list_remove_disposal(struct list_head *dst, 358 struct nfsd_fcache_disposal *l) 359 { 360 spin_lock(&l->lock); 361 list_splice_init(&l->freeme, dst); 362 spin_unlock(&l->lock); 363 } 364 365 static void 366 nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 367 { 368 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 369 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 370 371 spin_lock(&l->lock); 372 list_splice_tail_init(files, &l->freeme); 373 spin_unlock(&l->lock); 374 queue_work(nfsd_filecache_wq, &l->work); 375 } 376 377 static void 378 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 379 struct net *net) 380 { 381 struct nfsd_file *nf, *tmp; 382 383 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 384 if (nf->nf_net == net) 385 list_move_tail(&nf->nf_lru, dst); 386 } 387 } 388 389 static void 390 nfsd_file_dispose_list_delayed(struct list_head *dispose) 391 { 392 LIST_HEAD(list); 393 struct nfsd_file *nf; 394 395 while(!list_empty(dispose)) { 396 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 397 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 398 nfsd_file_list_add_disposal(&list, nf->nf_net); 399 } 400 } 401 402 /* 403 * Note this can deadlock with nfsd_file_cache_purge. 404 */ 405 static enum lru_status 406 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 407 spinlock_t *lock, void *arg) 408 __releases(lock) 409 __acquires(lock) 410 { 411 struct list_head *head = arg; 412 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 413 414 /* 415 * Do a lockless refcount check. The hashtable holds one reference, so 416 * we look to see if anything else has a reference, or if any have 417 * been put since the shrinker last ran. Those don't get unhashed and 418 * released. 419 * 420 * Note that in the put path, we set the flag and then decrement the 421 * counter. Here we check the counter and then test and clear the flag. 422 * That order is deliberate to ensure that we can do this locklessly. 423 */ 424 if (refcount_read(&nf->nf_ref) > 1) 425 goto out_skip; 426 427 /* 428 * Don't throw out files that are still undergoing I/O or 429 * that have uncleared errors pending. 430 */ 431 if (nfsd_file_check_writeback(nf)) 432 goto out_skip; 433 434 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 435 goto out_skip; 436 437 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 438 goto out_skip; 439 440 list_lru_isolate_move(lru, &nf->nf_lru, head); 441 return LRU_REMOVED; 442 out_skip: 443 return LRU_SKIP; 444 } 445 446 static unsigned long 447 nfsd_file_lru_walk_list(struct shrink_control *sc) 448 { 449 LIST_HEAD(head); 450 struct nfsd_file *nf; 451 unsigned long ret; 452 453 if (sc) 454 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 455 nfsd_file_lru_cb, &head); 456 else 457 ret = list_lru_walk(&nfsd_file_lru, 458 nfsd_file_lru_cb, 459 &head, LONG_MAX); 460 list_for_each_entry(nf, &head, nf_lru) { 461 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 462 nfsd_file_do_unhash(nf); 463 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 464 } 465 nfsd_file_dispose_list_delayed(&head); 466 return ret; 467 } 468 469 static void 470 nfsd_file_gc(void) 471 { 472 nfsd_file_lru_walk_list(NULL); 473 } 474 475 static void 476 nfsd_file_gc_worker(struct work_struct *work) 477 { 478 nfsd_file_gc(); 479 nfsd_file_schedule_laundrette(); 480 } 481 482 static unsigned long 483 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 484 { 485 return list_lru_count(&nfsd_file_lru); 486 } 487 488 static unsigned long 489 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 490 { 491 return nfsd_file_lru_walk_list(sc); 492 } 493 494 static struct shrinker nfsd_file_shrinker = { 495 .scan_objects = nfsd_file_lru_scan, 496 .count_objects = nfsd_file_lru_count, 497 .seeks = 1, 498 }; 499 500 static void 501 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 502 struct list_head *dispose) 503 { 504 struct nfsd_file *nf; 505 struct hlist_node *tmp; 506 507 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 508 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 509 if (inode == nf->nf_inode) 510 nfsd_file_unhash_and_release_locked(nf, dispose); 511 } 512 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 513 } 514 515 /** 516 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 517 * @inode: inode of the file to attempt to remove 518 * 519 * Walk the whole hash bucket, looking for any files that correspond to "inode". 520 * If any do, then unhash them and put the hashtable reference to them and 521 * destroy any that had their last reference put. Also ensure that any of the 522 * fputs also have their final __fput done as well. 523 */ 524 void 525 nfsd_file_close_inode_sync(struct inode *inode) 526 { 527 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 528 NFSD_FILE_HASH_BITS); 529 LIST_HEAD(dispose); 530 531 __nfsd_file_close_inode(inode, hashval, &dispose); 532 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 533 nfsd_file_dispose_list_sync(&dispose); 534 } 535 536 /** 537 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 538 * @inode: inode of the file to attempt to remove 539 * 540 * Walk the whole hash bucket, looking for any files that correspond to "inode". 541 * If any do, then unhash them and put the hashtable reference to them and 542 * destroy any that had their last reference put. 543 */ 544 static void 545 nfsd_file_close_inode(struct inode *inode) 546 { 547 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 548 NFSD_FILE_HASH_BITS); 549 LIST_HEAD(dispose); 550 551 __nfsd_file_close_inode(inode, hashval, &dispose); 552 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 553 nfsd_file_dispose_list_delayed(&dispose); 554 } 555 556 /** 557 * nfsd_file_delayed_close - close unused nfsd_files 558 * @work: dummy 559 * 560 * Walk the LRU list and close any entries that have not been used since 561 * the last scan. 562 * 563 * Note this can deadlock with nfsd_file_cache_purge. 564 */ 565 static void 566 nfsd_file_delayed_close(struct work_struct *work) 567 { 568 LIST_HEAD(head); 569 struct nfsd_fcache_disposal *l = container_of(work, 570 struct nfsd_fcache_disposal, work); 571 572 nfsd_file_list_remove_disposal(&head, l); 573 nfsd_file_dispose_list(&head); 574 } 575 576 static int 577 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 578 void *data) 579 { 580 struct file_lock *fl = data; 581 582 /* Only close files for F_SETLEASE leases */ 583 if (fl->fl_flags & FL_LEASE) 584 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 585 return 0; 586 } 587 588 static struct notifier_block nfsd_file_lease_notifier = { 589 .notifier_call = nfsd_file_lease_notifier_call, 590 }; 591 592 static int 593 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 594 struct inode *inode, struct inode *dir, 595 const struct qstr *name, u32 cookie) 596 { 597 if (WARN_ON_ONCE(!inode)) 598 return 0; 599 600 trace_nfsd_file_fsnotify_handle_event(inode, mask); 601 602 /* Should be no marks on non-regular files */ 603 if (!S_ISREG(inode->i_mode)) { 604 WARN_ON_ONCE(1); 605 return 0; 606 } 607 608 /* don't close files if this was not the last link */ 609 if (mask & FS_ATTRIB) { 610 if (inode->i_nlink) 611 return 0; 612 } 613 614 nfsd_file_close_inode(inode); 615 return 0; 616 } 617 618 619 static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 620 .handle_inode_event = nfsd_file_fsnotify_handle_event, 621 .free_mark = nfsd_file_mark_free, 622 }; 623 624 int 625 nfsd_file_cache_init(void) 626 { 627 int ret = -ENOMEM; 628 unsigned int i; 629 630 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 631 632 if (nfsd_file_hashtbl) 633 return 0; 634 635 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 636 if (!nfsd_filecache_wq) 637 goto out; 638 639 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 640 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 641 if (!nfsd_file_hashtbl) { 642 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 643 goto out_err; 644 } 645 646 nfsd_file_slab = kmem_cache_create("nfsd_file", 647 sizeof(struct nfsd_file), 0, 0, NULL); 648 if (!nfsd_file_slab) { 649 pr_err("nfsd: unable to create nfsd_file_slab\n"); 650 goto out_err; 651 } 652 653 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 654 sizeof(struct nfsd_file_mark), 0, 0, NULL); 655 if (!nfsd_file_mark_slab) { 656 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 657 goto out_err; 658 } 659 660 661 ret = list_lru_init(&nfsd_file_lru); 662 if (ret) { 663 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 664 goto out_err; 665 } 666 667 ret = register_shrinker(&nfsd_file_shrinker); 668 if (ret) { 669 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 670 goto out_lru; 671 } 672 673 ret = lease_register_notifier(&nfsd_file_lease_notifier); 674 if (ret) { 675 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 676 goto out_shrinker; 677 } 678 679 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 680 FSNOTIFY_GROUP_NOFS); 681 if (IS_ERR(nfsd_file_fsnotify_group)) { 682 pr_err("nfsd: unable to create fsnotify group: %ld\n", 683 PTR_ERR(nfsd_file_fsnotify_group)); 684 ret = PTR_ERR(nfsd_file_fsnotify_group); 685 nfsd_file_fsnotify_group = NULL; 686 goto out_notifier; 687 } 688 689 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 690 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 691 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 692 } 693 694 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 695 out: 696 return ret; 697 out_notifier: 698 lease_unregister_notifier(&nfsd_file_lease_notifier); 699 out_shrinker: 700 unregister_shrinker(&nfsd_file_shrinker); 701 out_lru: 702 list_lru_destroy(&nfsd_file_lru); 703 out_err: 704 kmem_cache_destroy(nfsd_file_slab); 705 nfsd_file_slab = NULL; 706 kmem_cache_destroy(nfsd_file_mark_slab); 707 nfsd_file_mark_slab = NULL; 708 kvfree(nfsd_file_hashtbl); 709 nfsd_file_hashtbl = NULL; 710 destroy_workqueue(nfsd_filecache_wq); 711 nfsd_filecache_wq = NULL; 712 goto out; 713 } 714 715 /* 716 * Note this can deadlock with nfsd_file_lru_cb. 717 */ 718 void 719 nfsd_file_cache_purge(struct net *net) 720 { 721 unsigned int i; 722 struct nfsd_file *nf; 723 struct hlist_node *next; 724 LIST_HEAD(dispose); 725 bool del; 726 727 if (!nfsd_file_hashtbl) 728 return; 729 730 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 731 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 732 733 spin_lock(&nfb->nfb_lock); 734 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 735 if (net && nf->nf_net != net) 736 continue; 737 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 738 739 /* 740 * Deadlock detected! Something marked this entry as 741 * unhased, but hasn't removed it from the hash list. 742 */ 743 WARN_ON_ONCE(!del); 744 } 745 spin_unlock(&nfb->nfb_lock); 746 nfsd_file_dispose_list(&dispose); 747 } 748 } 749 750 static struct nfsd_fcache_disposal * 751 nfsd_alloc_fcache_disposal(void) 752 { 753 struct nfsd_fcache_disposal *l; 754 755 l = kmalloc(sizeof(*l), GFP_KERNEL); 756 if (!l) 757 return NULL; 758 INIT_WORK(&l->work, nfsd_file_delayed_close); 759 spin_lock_init(&l->lock); 760 INIT_LIST_HEAD(&l->freeme); 761 return l; 762 } 763 764 static void 765 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 766 { 767 cancel_work_sync(&l->work); 768 nfsd_file_dispose_list(&l->freeme); 769 kfree(l); 770 } 771 772 static void 773 nfsd_free_fcache_disposal_net(struct net *net) 774 { 775 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 776 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 777 778 nfsd_free_fcache_disposal(l); 779 } 780 781 int 782 nfsd_file_cache_start_net(struct net *net) 783 { 784 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 785 786 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 787 return nn->fcache_disposal ? 0 : -ENOMEM; 788 } 789 790 void 791 nfsd_file_cache_shutdown_net(struct net *net) 792 { 793 nfsd_file_cache_purge(net); 794 nfsd_free_fcache_disposal_net(net); 795 } 796 797 void 798 nfsd_file_cache_shutdown(void) 799 { 800 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 801 802 lease_unregister_notifier(&nfsd_file_lease_notifier); 803 unregister_shrinker(&nfsd_file_shrinker); 804 /* 805 * make sure all callers of nfsd_file_lru_cb are done before 806 * calling nfsd_file_cache_purge 807 */ 808 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 809 nfsd_file_cache_purge(NULL); 810 list_lru_destroy(&nfsd_file_lru); 811 rcu_barrier(); 812 fsnotify_put_group(nfsd_file_fsnotify_group); 813 nfsd_file_fsnotify_group = NULL; 814 kmem_cache_destroy(nfsd_file_slab); 815 nfsd_file_slab = NULL; 816 fsnotify_wait_marks_destroyed(); 817 kmem_cache_destroy(nfsd_file_mark_slab); 818 nfsd_file_mark_slab = NULL; 819 kvfree(nfsd_file_hashtbl); 820 nfsd_file_hashtbl = NULL; 821 destroy_workqueue(nfsd_filecache_wq); 822 nfsd_filecache_wq = NULL; 823 } 824 825 static bool 826 nfsd_match_cred(const struct cred *c1, const struct cred *c2) 827 { 828 int i; 829 830 if (!uid_eq(c1->fsuid, c2->fsuid)) 831 return false; 832 if (!gid_eq(c1->fsgid, c2->fsgid)) 833 return false; 834 if (c1->group_info == NULL || c2->group_info == NULL) 835 return c1->group_info == c2->group_info; 836 if (c1->group_info->ngroups != c2->group_info->ngroups) 837 return false; 838 for (i = 0; i < c1->group_info->ngroups; i++) { 839 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 840 return false; 841 } 842 return true; 843 } 844 845 static struct nfsd_file * 846 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 847 unsigned int hashval, struct net *net) 848 { 849 struct nfsd_file *nf; 850 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 851 852 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 853 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 854 if (nf->nf_may != need) 855 continue; 856 if (nf->nf_inode != inode) 857 continue; 858 if (nf->nf_net != net) 859 continue; 860 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 861 continue; 862 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 863 continue; 864 if (nfsd_file_get(nf) != NULL) 865 return nf; 866 } 867 return NULL; 868 } 869 870 /** 871 * nfsd_file_is_cached - are there any cached open files for this fh? 872 * @inode: inode of the file to check 873 * 874 * Scan the hashtable for open files that match this fh. Returns true if there 875 * are any, and false if not. 876 */ 877 bool 878 nfsd_file_is_cached(struct inode *inode) 879 { 880 bool ret = false; 881 struct nfsd_file *nf; 882 unsigned int hashval; 883 884 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 885 886 rcu_read_lock(); 887 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 888 nf_node) { 889 if (inode == nf->nf_inode) { 890 ret = true; 891 break; 892 } 893 } 894 rcu_read_unlock(); 895 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 896 return ret; 897 } 898 899 static __be32 900 nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 901 unsigned int may_flags, struct nfsd_file **pnf, bool open) 902 { 903 __be32 status; 904 struct net *net = SVC_NET(rqstp); 905 struct nfsd_file *nf, *new; 906 struct inode *inode; 907 unsigned int hashval; 908 bool retry = true; 909 910 /* FIXME: skip this if fh_dentry is already set? */ 911 status = fh_verify(rqstp, fhp, S_IFREG, 912 may_flags|NFSD_MAY_OWNER_OVERRIDE); 913 if (status != nfs_ok) 914 return status; 915 916 inode = d_inode(fhp->fh_dentry); 917 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 918 retry: 919 rcu_read_lock(); 920 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 921 rcu_read_unlock(); 922 if (nf) 923 goto wait_for_construction; 924 925 new = nfsd_file_alloc(inode, may_flags, hashval, net); 926 if (!new) { 927 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 928 NULL, nfserr_jukebox); 929 return nfserr_jukebox; 930 } 931 932 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 933 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 934 if (nf == NULL) 935 goto open_file; 936 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 937 nfsd_file_slab_free(&new->nf_rcu); 938 939 wait_for_construction: 940 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 941 942 /* Did construction of this file fail? */ 943 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 944 if (!retry) { 945 status = nfserr_jukebox; 946 goto out; 947 } 948 retry = false; 949 nfsd_file_put_noref(nf); 950 goto retry; 951 } 952 953 this_cpu_inc(nfsd_file_cache_hits); 954 955 status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); 956 out: 957 if (status == nfs_ok) { 958 *pnf = nf; 959 } else { 960 nfsd_file_put(nf); 961 nf = NULL; 962 } 963 964 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 965 return status; 966 open_file: 967 nf = new; 968 /* Take reference for the hashtable */ 969 refcount_inc(&nf->nf_ref); 970 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 971 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 972 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 973 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 974 ++nfsd_file_hashtbl[hashval].nfb_count; 975 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 976 nfsd_file_hashtbl[hashval].nfb_count); 977 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 978 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 979 nfsd_file_gc(); 980 981 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 982 if (nf->nf_mark) { 983 if (open) { 984 status = nfsd_open_verified(rqstp, fhp, may_flags, 985 &nf->nf_file); 986 trace_nfsd_file_open(nf, status); 987 } else 988 status = nfs_ok; 989 } else 990 status = nfserr_jukebox; 991 /* 992 * If construction failed, or we raced with a call to unlink() 993 * then unhash. 994 */ 995 if (status != nfs_ok || inode->i_nlink == 0) { 996 bool do_free; 997 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 998 do_free = nfsd_file_unhash(nf); 999 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1000 if (do_free) 1001 nfsd_file_put_noref(nf); 1002 } 1003 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1004 smp_mb__after_atomic(); 1005 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1006 goto out; 1007 } 1008 1009 /** 1010 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1011 * @rqstp: the RPC transaction being executed 1012 * @fhp: the NFS filehandle of the file to be opened 1013 * @may_flags: NFSD_MAY_ settings for the file 1014 * @pnf: OUT: new or found "struct nfsd_file" object 1015 * 1016 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1017 * network byte order is returned. 1018 */ 1019 __be32 1020 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1021 unsigned int may_flags, struct nfsd_file **pnf) 1022 { 1023 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); 1024 } 1025 1026 /** 1027 * nfsd_file_create - Get a struct nfsd_file, do not open 1028 * @rqstp: the RPC transaction being executed 1029 * @fhp: the NFS filehandle of the file just created 1030 * @may_flags: NFSD_MAY_ settings for the file 1031 * @pnf: OUT: new or found "struct nfsd_file" object 1032 * 1033 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1034 * network byte order is returned. 1035 */ 1036 __be32 1037 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1038 unsigned int may_flags, struct nfsd_file **pnf) 1039 { 1040 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); 1041 } 1042 1043 /* 1044 * Note that fields may be added, removed or reordered in the future. Programs 1045 * scraping this file for info should test the labels to ensure they're 1046 * getting the correct field. 1047 */ 1048 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1049 { 1050 unsigned int i, count = 0, longest = 0; 1051 unsigned long hits = 0; 1052 1053 /* 1054 * No need for spinlocks here since we're not terribly interested in 1055 * accuracy. We do take the nfsd_mutex simply to ensure that we 1056 * don't end up racing with server shutdown 1057 */ 1058 mutex_lock(&nfsd_mutex); 1059 if (nfsd_file_hashtbl) { 1060 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1061 count += nfsd_file_hashtbl[i].nfb_count; 1062 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1063 } 1064 } 1065 mutex_unlock(&nfsd_mutex); 1066 1067 for_each_possible_cpu(i) 1068 hits += per_cpu(nfsd_file_cache_hits, i); 1069 1070 seq_printf(m, "total entries: %u\n", count); 1071 seq_printf(m, "longest chain: %u\n", longest); 1072 seq_printf(m, "cache hits: %lu\n", hits); 1073 return 0; 1074 } 1075 1076 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1077 { 1078 return single_open(file, nfsd_file_cache_stats_show, NULL); 1079 } 1080