1 /* 2 * pNFS functions to call and manage layout drivers. 3 * 4 * Copyright (c) 2002 [year of first publication] 5 * The Regents of the University of Michigan 6 * All Rights Reserved 7 * 8 * Dean Hildebrand <dhildebz@umich.edu> 9 * 10 * Permission is granted to use, copy, create derivative works, and 11 * redistribute this software and such derivative works for any purpose, 12 * so long as the name of the University of Michigan is not used in 13 * any advertising or publicity pertaining to the use or distribution 14 * of this software without specific, written prior authorization. If 15 * the above copyright notice or any other identification of the 16 * University of Michigan is included in any copy of any portion of 17 * this software, then the disclaimer below must also be included. 18 * 19 * This software is provided as is, without representation or warranty 20 * of any kind either express or implied, including without limitation 21 * the implied warranties of merchantability, fitness for a particular 22 * purpose, or noninfringement. The Regents of the University of 23 * Michigan shall not be liable for any damages, including special, 24 * indirect, incidental, or consequential damages, with respect to any 25 * claim arising out of or in connection with the use of the software, 26 * even if it has been or is hereafter advised of the possibility of 27 * such damages. 28 */ 29 30 #include <linux/nfs_fs.h> 31 #include <linux/nfs_page.h> 32 #include <linux/module.h> 33 #include "internal.h" 34 #include "pnfs.h" 35 #include "iostat.h" 36 #include "nfs4trace.h" 37 #include "delegation.h" 38 #include "nfs42.h" 39 40 #define NFSDBG_FACILITY NFSDBG_PNFS 41 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) 42 43 /* Locking: 44 * 45 * pnfs_spinlock: 46 * protects pnfs_modules_tbl. 47 */ 48 static DEFINE_SPINLOCK(pnfs_spinlock); 49 50 /* 51 * pnfs_modules_tbl holds all pnfs modules 52 */ 53 static LIST_HEAD(pnfs_modules_tbl); 54 55 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); 56 57 /* Return the registered pnfs layout driver module matching given id */ 58 static struct pnfs_layoutdriver_type * 59 find_pnfs_driver_locked(u32 id) 60 { 61 struct pnfs_layoutdriver_type *local; 62 63 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) 64 if (local->id == id) 65 goto out; 66 local = NULL; 67 out: 68 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); 69 return local; 70 } 71 72 static struct pnfs_layoutdriver_type * 73 find_pnfs_driver(u32 id) 74 { 75 struct pnfs_layoutdriver_type *local; 76 77 spin_lock(&pnfs_spinlock); 78 local = find_pnfs_driver_locked(id); 79 if (local != NULL && !try_module_get(local->owner)) { 80 dprintk("%s: Could not grab reference on module\n", __func__); 81 local = NULL; 82 } 83 spin_unlock(&pnfs_spinlock); 84 return local; 85 } 86 87 void 88 unset_pnfs_layoutdriver(struct nfs_server *nfss) 89 { 90 if (nfss->pnfs_curr_ld) { 91 if (nfss->pnfs_curr_ld->clear_layoutdriver) 92 nfss->pnfs_curr_ld->clear_layoutdriver(nfss); 93 /* Decrement the MDS count. Purge the deviceid cache if zero */ 94 if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) 95 nfs4_deviceid_purge_client(nfss->nfs_client); 96 module_put(nfss->pnfs_curr_ld->owner); 97 } 98 nfss->pnfs_curr_ld = NULL; 99 } 100 101 /* 102 * Try to set the server's pnfs module to the pnfs layout type specified by id. 103 * Currently only one pNFS layout driver per filesystem is supported. 104 * 105 * @id layout type. Zero (illegal layout type) indicates pNFS not in use. 106 */ 107 void 108 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, 109 u32 id) 110 { 111 struct pnfs_layoutdriver_type *ld_type = NULL; 112 113 if (id == 0) 114 goto out_no_driver; 115 if (!(server->nfs_client->cl_exchange_flags & 116 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { 117 printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", 118 __func__, id, server->nfs_client->cl_exchange_flags); 119 goto out_no_driver; 120 } 121 ld_type = find_pnfs_driver(id); 122 if (!ld_type) { 123 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); 124 ld_type = find_pnfs_driver(id); 125 if (!ld_type) { 126 dprintk("%s: No pNFS module found for %u.\n", 127 __func__, id); 128 goto out_no_driver; 129 } 130 } 131 server->pnfs_curr_ld = ld_type; 132 if (ld_type->set_layoutdriver 133 && ld_type->set_layoutdriver(server, mntfh)) { 134 printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " 135 "driver %u.\n", __func__, id); 136 module_put(ld_type->owner); 137 goto out_no_driver; 138 } 139 /* Bump the MDS count */ 140 atomic_inc(&server->nfs_client->cl_mds_count); 141 142 dprintk("%s: pNFS module for %u set\n", __func__, id); 143 return; 144 145 out_no_driver: 146 dprintk("%s: Using NFSv4 I/O\n", __func__); 147 server->pnfs_curr_ld = NULL; 148 } 149 150 int 151 pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 152 { 153 int status = -EINVAL; 154 struct pnfs_layoutdriver_type *tmp; 155 156 if (ld_type->id == 0) { 157 printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); 158 return status; 159 } 160 if (!ld_type->alloc_lseg || !ld_type->free_lseg) { 161 printk(KERN_ERR "NFS: %s Layout driver must provide " 162 "alloc_lseg and free_lseg.\n", __func__); 163 return status; 164 } 165 166 spin_lock(&pnfs_spinlock); 167 tmp = find_pnfs_driver_locked(ld_type->id); 168 if (!tmp) { 169 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); 170 status = 0; 171 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, 172 ld_type->name); 173 } else { 174 printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", 175 __func__, ld_type->id); 176 } 177 spin_unlock(&pnfs_spinlock); 178 179 return status; 180 } 181 EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); 182 183 void 184 pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) 185 { 186 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); 187 spin_lock(&pnfs_spinlock); 188 list_del(&ld_type->pnfs_tblid); 189 spin_unlock(&pnfs_spinlock); 190 } 191 EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 192 193 /* 194 * pNFS client layout cache 195 */ 196 197 /* Need to hold i_lock if caller does not already hold reference */ 198 void 199 pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) 200 { 201 atomic_inc(&lo->plh_refcount); 202 } 203 204 static struct pnfs_layout_hdr * 205 pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 206 { 207 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 208 return ld->alloc_layout_hdr(ino, gfp_flags); 209 } 210 211 static void 212 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 213 { 214 struct nfs_server *server = NFS_SERVER(lo->plh_inode); 215 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; 216 217 if (!list_empty(&lo->plh_layouts)) { 218 struct nfs_client *clp = server->nfs_client; 219 220 spin_lock(&clp->cl_lock); 221 list_del_init(&lo->plh_layouts); 222 spin_unlock(&clp->cl_lock); 223 } 224 put_rpccred(lo->plh_lc_cred); 225 return ld->free_layout_hdr(lo); 226 } 227 228 static void 229 pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo) 230 { 231 struct nfs_inode *nfsi = NFS_I(lo->plh_inode); 232 dprintk("%s: freeing layout cache %p\n", __func__, lo); 233 nfsi->layout = NULL; 234 /* Reset MDS Threshold I/O counters */ 235 nfsi->write_io = 0; 236 nfsi->read_io = 0; 237 } 238 239 void 240 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) 241 { 242 struct inode *inode = lo->plh_inode; 243 244 pnfs_layoutreturn_before_put_layout_hdr(lo); 245 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 247 if (!list_empty(&lo->plh_segs)) 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 249 pnfs_detach_layout_hdr(lo); 250 spin_unlock(&inode->i_lock); 251 pnfs_free_layout_hdr(lo); 252 } 253 } 254 255 /* 256 * Mark a pnfs_layout_hdr and all associated layout segments as invalid 257 * 258 * In order to continue using the pnfs_layout_hdr, a full recovery 259 * is required. 260 * Note that caller must hold inode->i_lock. 261 */ 262 static int 263 pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, 264 struct list_head *lseg_list) 265 { 266 struct pnfs_layout_range range = { 267 .iomode = IOMODE_ANY, 268 .offset = 0, 269 .length = NFS4_MAX_UINT64, 270 }; 271 272 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 273 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); 274 } 275 276 static int 277 pnfs_iomode_to_fail_bit(u32 iomode) 278 { 279 return iomode == IOMODE_RW ? 280 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 281 } 282 283 static void 284 pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 285 { 286 lo->plh_retry_timestamp = jiffies; 287 if (!test_and_set_bit(fail_bit, &lo->plh_flags)) 288 atomic_inc(&lo->plh_refcount); 289 } 290 291 static void 292 pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) 293 { 294 if (test_and_clear_bit(fail_bit, &lo->plh_flags)) 295 atomic_dec(&lo->plh_refcount); 296 } 297 298 static void 299 pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) 300 { 301 struct inode *inode = lo->plh_inode; 302 struct pnfs_layout_range range = { 303 .iomode = iomode, 304 .offset = 0, 305 .length = NFS4_MAX_UINT64, 306 }; 307 LIST_HEAD(head); 308 309 spin_lock(&inode->i_lock); 310 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 311 pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); 312 spin_unlock(&inode->i_lock); 313 pnfs_free_lseg_list(&head); 314 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, 315 iomode == IOMODE_RW ? "RW" : "READ"); 316 } 317 318 static bool 319 pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode) 320 { 321 unsigned long start, end; 322 int fail_bit = pnfs_iomode_to_fail_bit(iomode); 323 324 if (test_bit(fail_bit, &lo->plh_flags) == 0) 325 return false; 326 end = jiffies; 327 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT; 328 if (!time_in_range(lo->plh_retry_timestamp, start, end)) { 329 /* It is time to retry the failed layoutgets */ 330 pnfs_layout_clear_fail_bit(lo, fail_bit); 331 return false; 332 } 333 return true; 334 } 335 336 static void 337 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) 338 { 339 INIT_LIST_HEAD(&lseg->pls_list); 340 INIT_LIST_HEAD(&lseg->pls_lc_list); 341 atomic_set(&lseg->pls_refcount, 1); 342 smp_mb(); 343 set_bit(NFS_LSEG_VALID, &lseg->pls_flags); 344 lseg->pls_layout = lo; 345 } 346 347 static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) 348 { 349 struct inode *ino = lseg->pls_layout->plh_inode; 350 351 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 352 } 353 354 static void 355 pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, 356 struct pnfs_layout_segment *lseg) 357 { 358 struct inode *inode = lo->plh_inode; 359 360 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 361 list_del_init(&lseg->pls_list); 362 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ 363 atomic_dec(&lo->plh_refcount); 364 if (list_empty(&lo->plh_segs)) 365 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 366 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 367 } 368 369 void 370 pnfs_put_lseg(struct pnfs_layout_segment *lseg) 371 { 372 struct pnfs_layout_hdr *lo; 373 struct inode *inode; 374 375 if (!lseg) 376 return; 377 378 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 379 atomic_read(&lseg->pls_refcount), 380 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 381 382 lo = lseg->pls_layout; 383 inode = lo->plh_inode; 384 385 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 386 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 387 spin_unlock(&inode->i_lock); 388 return; 389 } 390 pnfs_get_layout_hdr(lo); 391 pnfs_layout_remove_lseg(lo, lseg); 392 spin_unlock(&inode->i_lock); 393 pnfs_free_lseg(lseg); 394 pnfs_put_layout_hdr(lo); 395 } 396 } 397 EXPORT_SYMBOL_GPL(pnfs_put_lseg); 398 399 static void pnfs_free_lseg_async_work(struct work_struct *work) 400 { 401 struct pnfs_layout_segment *lseg; 402 struct pnfs_layout_hdr *lo; 403 404 lseg = container_of(work, struct pnfs_layout_segment, pls_work); 405 lo = lseg->pls_layout; 406 407 pnfs_free_lseg(lseg); 408 pnfs_put_layout_hdr(lo); 409 } 410 411 static void pnfs_free_lseg_async(struct pnfs_layout_segment *lseg) 412 { 413 INIT_WORK(&lseg->pls_work, pnfs_free_lseg_async_work); 414 schedule_work(&lseg->pls_work); 415 } 416 417 void 418 pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) 419 { 420 if (!lseg) 421 return; 422 423 assert_spin_locked(&lseg->pls_layout->plh_inode->i_lock); 424 425 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 426 atomic_read(&lseg->pls_refcount), 427 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 428 if (atomic_dec_and_test(&lseg->pls_refcount)) { 429 struct pnfs_layout_hdr *lo = lseg->pls_layout; 430 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) 431 return; 432 pnfs_get_layout_hdr(lo); 433 pnfs_layout_remove_lseg(lo, lseg); 434 pnfs_free_lseg_async(lseg); 435 } 436 } 437 EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); 438 439 static u64 440 end_offset(u64 start, u64 len) 441 { 442 u64 end; 443 444 end = start + len; 445 return end >= start ? end : NFS4_MAX_UINT64; 446 } 447 448 /* 449 * is l2 fully contained in l1? 450 * start1 end1 451 * [----------------------------------) 452 * start2 end2 453 * [----------------) 454 */ 455 static bool 456 pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, 457 const struct pnfs_layout_range *l2) 458 { 459 u64 start1 = l1->offset; 460 u64 end1 = end_offset(start1, l1->length); 461 u64 start2 = l2->offset; 462 u64 end2 = end_offset(start2, l2->length); 463 464 return (start1 <= start2) && (end1 >= end2); 465 } 466 467 /* 468 * is l1 and l2 intersecting? 469 * start1 end1 470 * [----------------------------------) 471 * start2 end2 472 * [----------------) 473 */ 474 static bool 475 pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, 476 const struct pnfs_layout_range *l2) 477 { 478 u64 start1 = l1->offset; 479 u64 end1 = end_offset(start1, l1->length); 480 u64 start2 = l2->offset; 481 u64 end2 = end_offset(start2, l2->length); 482 483 return (end1 == NFS4_MAX_UINT64 || end1 > start2) && 484 (end2 == NFS4_MAX_UINT64 || end2 > start1); 485 } 486 487 static bool 488 should_free_lseg(const struct pnfs_layout_range *lseg_range, 489 const struct pnfs_layout_range *recall_range) 490 { 491 return (recall_range->iomode == IOMODE_ANY || 492 lseg_range->iomode == recall_range->iomode) && 493 pnfs_lseg_range_intersecting(lseg_range, recall_range); 494 } 495 496 static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, 497 struct list_head *tmp_list) 498 { 499 if (!atomic_dec_and_test(&lseg->pls_refcount)) 500 return false; 501 pnfs_layout_remove_lseg(lseg->pls_layout, lseg); 502 list_add(&lseg->pls_list, tmp_list); 503 return true; 504 } 505 506 /* Returns 1 if lseg is removed from list, 0 otherwise */ 507 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 508 struct list_head *tmp_list) 509 { 510 int rv = 0; 511 512 if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 513 /* Remove the reference keeping the lseg in the 514 * list. It will now be removed when all 515 * outstanding io is finished. 516 */ 517 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 518 atomic_read(&lseg->pls_refcount)); 519 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) 520 rv = 1; 521 } 522 return rv; 523 } 524 525 /* 526 * Compare 2 layout stateid sequence ids, to see which is newer, 527 * taking into account wraparound issues. 528 */ 529 static bool pnfs_seqid_is_newer(u32 s1, u32 s2) 530 { 531 return (s32)(s1 - s2) > 0; 532 } 533 534 /** 535 * pnfs_mark_matching_lsegs_invalid - tear down lsegs or mark them for later 536 * @lo: layout header containing the lsegs 537 * @tmp_list: list head where doomed lsegs should go 538 * @recall_range: optional recall range argument to match (may be NULL) 539 * @seq: only invalidate lsegs obtained prior to this sequence (may be 0) 540 * 541 * Walk the list of lsegs in the layout header, and tear down any that should 542 * be destroyed. If "recall_range" is specified then the segment must match 543 * that range. If "seq" is non-zero, then only match segments that were handed 544 * out at or before that sequence. 545 * 546 * Returns number of matching invalid lsegs remaining in list after scanning 547 * it and purging them. 548 */ 549 int 550 pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 551 struct list_head *tmp_list, 552 const struct pnfs_layout_range *recall_range, 553 u32 seq) 554 { 555 struct pnfs_layout_segment *lseg, *next; 556 int remaining = 0; 557 558 dprintk("%s:Begin lo %p\n", __func__, lo); 559 560 if (list_empty(&lo->plh_segs)) 561 return 0; 562 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 563 if (!recall_range || 564 should_free_lseg(&lseg->pls_range, recall_range)) { 565 if (seq && pnfs_seqid_is_newer(lseg->pls_seq, seq)) 566 continue; 567 dprintk("%s: freeing lseg %p iomode %d seq %u" 568 "offset %llu length %llu\n", __func__, 569 lseg, lseg->pls_range.iomode, lseg->pls_seq, 570 lseg->pls_range.offset, lseg->pls_range.length); 571 if (!mark_lseg_invalid(lseg, tmp_list)) 572 remaining++; 573 } 574 dprintk("%s:Return %i\n", __func__, remaining); 575 return remaining; 576 } 577 578 /* note free_me must contain lsegs from a single layout_hdr */ 579 void 580 pnfs_free_lseg_list(struct list_head *free_me) 581 { 582 struct pnfs_layout_segment *lseg, *tmp; 583 584 if (list_empty(free_me)) 585 return; 586 587 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 588 list_del(&lseg->pls_list); 589 pnfs_free_lseg(lseg); 590 } 591 } 592 593 void 594 pnfs_destroy_layout(struct nfs_inode *nfsi) 595 { 596 struct pnfs_layout_hdr *lo; 597 LIST_HEAD(tmp_list); 598 599 spin_lock(&nfsi->vfs_inode.i_lock); 600 lo = nfsi->layout; 601 if (lo) { 602 pnfs_get_layout_hdr(lo); 603 pnfs_mark_layout_stateid_invalid(lo, &tmp_list); 604 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 605 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 606 spin_unlock(&nfsi->vfs_inode.i_lock); 607 pnfs_free_lseg_list(&tmp_list); 608 pnfs_put_layout_hdr(lo); 609 } else 610 spin_unlock(&nfsi->vfs_inode.i_lock); 611 } 612 EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 613 614 static bool 615 pnfs_layout_add_bulk_destroy_list(struct inode *inode, 616 struct list_head *layout_list) 617 { 618 struct pnfs_layout_hdr *lo; 619 bool ret = false; 620 621 spin_lock(&inode->i_lock); 622 lo = NFS_I(inode)->layout; 623 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { 624 pnfs_get_layout_hdr(lo); 625 list_add(&lo->plh_bulk_destroy, layout_list); 626 ret = true; 627 } 628 spin_unlock(&inode->i_lock); 629 return ret; 630 } 631 632 /* Caller must hold rcu_read_lock and clp->cl_lock */ 633 static int 634 pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, 635 struct nfs_server *server, 636 struct list_head *layout_list) 637 { 638 struct pnfs_layout_hdr *lo, *next; 639 struct inode *inode; 640 641 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { 642 inode = igrab(lo->plh_inode); 643 if (inode == NULL) 644 continue; 645 list_del_init(&lo->plh_layouts); 646 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) 647 continue; 648 rcu_read_unlock(); 649 spin_unlock(&clp->cl_lock); 650 iput(inode); 651 spin_lock(&clp->cl_lock); 652 rcu_read_lock(); 653 return -EAGAIN; 654 } 655 return 0; 656 } 657 658 static int 659 pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, 660 bool is_bulk_recall) 661 { 662 struct pnfs_layout_hdr *lo; 663 struct inode *inode; 664 LIST_HEAD(lseg_list); 665 int ret = 0; 666 667 while (!list_empty(layout_list)) { 668 lo = list_entry(layout_list->next, struct pnfs_layout_hdr, 669 plh_bulk_destroy); 670 dprintk("%s freeing layout for inode %lu\n", __func__, 671 lo->plh_inode->i_ino); 672 inode = lo->plh_inode; 673 674 pnfs_layoutcommit_inode(inode, false); 675 676 spin_lock(&inode->i_lock); 677 list_del_init(&lo->plh_bulk_destroy); 678 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { 679 if (is_bulk_recall) 680 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 681 ret = -EAGAIN; 682 } 683 spin_unlock(&inode->i_lock); 684 pnfs_free_lseg_list(&lseg_list); 685 /* Free all lsegs that are attached to commit buckets */ 686 nfs_commit_inode(inode, 0); 687 pnfs_put_layout_hdr(lo); 688 iput(inode); 689 } 690 return ret; 691 } 692 693 int 694 pnfs_destroy_layouts_byfsid(struct nfs_client *clp, 695 struct nfs_fsid *fsid, 696 bool is_recall) 697 { 698 struct nfs_server *server; 699 LIST_HEAD(layout_list); 700 701 spin_lock(&clp->cl_lock); 702 rcu_read_lock(); 703 restart: 704 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 705 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) 706 continue; 707 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 708 server, 709 &layout_list) != 0) 710 goto restart; 711 } 712 rcu_read_unlock(); 713 spin_unlock(&clp->cl_lock); 714 715 if (list_empty(&layout_list)) 716 return 0; 717 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 718 } 719 720 int 721 pnfs_destroy_layouts_byclid(struct nfs_client *clp, 722 bool is_recall) 723 { 724 struct nfs_server *server; 725 LIST_HEAD(layout_list); 726 727 spin_lock(&clp->cl_lock); 728 rcu_read_lock(); 729 restart: 730 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 731 if (pnfs_layout_bulk_destroy_byserver_locked(clp, 732 server, 733 &layout_list) != 0) 734 goto restart; 735 } 736 rcu_read_unlock(); 737 spin_unlock(&clp->cl_lock); 738 739 if (list_empty(&layout_list)) 740 return 0; 741 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); 742 } 743 744 /* 745 * Called by the state manger to remove all layouts established under an 746 * expired lease. 747 */ 748 void 749 pnfs_destroy_all_layouts(struct nfs_client *clp) 750 { 751 nfs4_deviceid_mark_client_invalid(clp); 752 nfs4_deviceid_purge_client(clp); 753 754 pnfs_destroy_layouts_byclid(clp, false); 755 } 756 757 /* update lo->plh_stateid with new if is more recent */ 758 void 759 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 760 bool update_barrier) 761 { 762 u32 oldseq, newseq, new_barrier; 763 int empty = list_empty(&lo->plh_segs); 764 765 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 766 newseq = be32_to_cpu(new->seqid); 767 if (empty || pnfs_seqid_is_newer(newseq, oldseq)) { 768 nfs4_stateid_copy(&lo->plh_stateid, new); 769 if (update_barrier) { 770 new_barrier = be32_to_cpu(new->seqid); 771 } else { 772 /* Because of wraparound, we want to keep the barrier 773 * "close" to the current seqids. 774 */ 775 new_barrier = newseq - atomic_read(&lo->plh_outstanding); 776 } 777 if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) 778 lo->plh_barrier = new_barrier; 779 } 780 } 781 782 static bool 783 pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, 784 const nfs4_stateid *stateid) 785 { 786 u32 seqid = be32_to_cpu(stateid->seqid); 787 788 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); 789 } 790 791 /* lget is set to 1 if called from inside send_layoutget call chain */ 792 static bool 793 pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) 794 { 795 return lo->plh_block_lgets || 796 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 797 } 798 799 /* 800 * Get layout from server. 801 * for now, assume that whole file layouts are requested. 802 * arg->offset: 0 803 * arg->length: all ones 804 */ 805 static struct pnfs_layout_segment * 806 send_layoutget(struct pnfs_layout_hdr *lo, 807 struct nfs_open_context *ctx, 808 nfs4_stateid *stateid, 809 const struct pnfs_layout_range *range, 810 long *timeout, gfp_t gfp_flags) 811 { 812 struct inode *ino = lo->plh_inode; 813 struct nfs_server *server = NFS_SERVER(ino); 814 struct nfs4_layoutget *lgp; 815 loff_t i_size; 816 817 dprintk("--> %s\n", __func__); 818 819 /* 820 * Synchronously retrieve layout information from server and 821 * store in lseg. If we race with a concurrent seqid morphing 822 * op, then re-send the LAYOUTGET. 823 */ 824 lgp = kzalloc(sizeof(*lgp), gfp_flags); 825 if (lgp == NULL) 826 return ERR_PTR(-ENOMEM); 827 828 i_size = i_size_read(ino); 829 830 lgp->args.minlength = PAGE_SIZE; 831 if (lgp->args.minlength > range->length) 832 lgp->args.minlength = range->length; 833 if (range->iomode == IOMODE_READ) { 834 if (range->offset >= i_size) 835 lgp->args.minlength = 0; 836 else if (i_size - range->offset < lgp->args.minlength) 837 lgp->args.minlength = i_size - range->offset; 838 } 839 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 840 pnfs_copy_range(&lgp->args.range, range); 841 lgp->args.type = server->pnfs_curr_ld->id; 842 lgp->args.inode = ino; 843 lgp->args.ctx = get_nfs_open_context(ctx); 844 nfs4_stateid_copy(&lgp->args.stateid, stateid); 845 lgp->gfp_flags = gfp_flags; 846 lgp->cred = lo->plh_lc_cred; 847 848 return nfs4_proc_layoutget(lgp, timeout, gfp_flags); 849 } 850 851 static void pnfs_clear_layoutcommit(struct inode *inode, 852 struct list_head *head) 853 { 854 struct nfs_inode *nfsi = NFS_I(inode); 855 struct pnfs_layout_segment *lseg, *tmp; 856 857 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 858 return; 859 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { 860 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 861 continue; 862 pnfs_lseg_dec_and_remove_zero(lseg, head); 863 } 864 } 865 866 void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 867 { 868 clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); 869 smp_mb__after_atomic(); 870 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); 871 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 872 } 873 874 static bool 875 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) 876 { 877 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 878 return false; 879 lo->plh_return_iomode = 0; 880 lo->plh_return_seq = 0; 881 pnfs_get_layout_hdr(lo); 882 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 883 return true; 884 } 885 886 static int 887 pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 888 enum pnfs_iomode iomode, bool sync) 889 { 890 struct inode *ino = lo->plh_inode; 891 struct nfs4_layoutreturn *lrp; 892 int status = 0; 893 894 lrp = kzalloc(sizeof(*lrp), GFP_NOFS); 895 if (unlikely(lrp == NULL)) { 896 status = -ENOMEM; 897 spin_lock(&ino->i_lock); 898 pnfs_clear_layoutreturn_waitbit(lo); 899 spin_unlock(&ino->i_lock); 900 pnfs_put_layout_hdr(lo); 901 goto out; 902 } 903 904 nfs4_stateid_copy(&lrp->args.stateid, stateid); 905 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 906 lrp->args.inode = ino; 907 lrp->args.range.iomode = iomode; 908 lrp->args.range.offset = 0; 909 lrp->args.range.length = NFS4_MAX_UINT64; 910 lrp->args.layout = lo; 911 lrp->clp = NFS_SERVER(ino)->nfs_client; 912 lrp->cred = lo->plh_lc_cred; 913 914 status = nfs4_proc_layoutreturn(lrp, sync); 915 out: 916 dprintk("<-- %s status: %d\n", __func__, status); 917 return status; 918 } 919 920 /* Return true if layoutreturn is needed */ 921 static bool 922 pnfs_layout_need_return(struct pnfs_layout_hdr *lo) 923 { 924 struct pnfs_layout_segment *s; 925 926 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 927 return false; 928 929 /* Defer layoutreturn until all lsegs are done */ 930 list_for_each_entry(s, &lo->plh_segs, pls_list) { 931 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) 932 return false; 933 } 934 935 return true; 936 } 937 938 static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) 939 { 940 struct inode *inode= lo->plh_inode; 941 942 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) 943 return; 944 spin_lock(&inode->i_lock); 945 if (pnfs_layout_need_return(lo)) { 946 nfs4_stateid stateid; 947 enum pnfs_iomode iomode; 948 bool send; 949 950 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 951 stateid.seqid = cpu_to_be32(lo->plh_return_seq); 952 iomode = lo->plh_return_iomode; 953 send = pnfs_prepare_layoutreturn(lo); 954 spin_unlock(&inode->i_lock); 955 if (send) { 956 /* Send an async layoutreturn so we dont deadlock */ 957 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 958 } 959 } else 960 spin_unlock(&inode->i_lock); 961 } 962 963 /* 964 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 965 * when the layout segment list is empty. 966 * 967 * Note that a pnfs_layout_hdr can exist with an empty layout segment 968 * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the 969 * deviceid is marked invalid. 970 */ 971 int 972 _pnfs_return_layout(struct inode *ino) 973 { 974 struct pnfs_layout_hdr *lo = NULL; 975 struct nfs_inode *nfsi = NFS_I(ino); 976 LIST_HEAD(tmp_list); 977 nfs4_stateid stateid; 978 int status = 0, empty; 979 bool send; 980 981 dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); 982 983 spin_lock(&ino->i_lock); 984 lo = nfsi->layout; 985 if (!lo) { 986 spin_unlock(&ino->i_lock); 987 dprintk("NFS: %s no layout to return\n", __func__); 988 goto out; 989 } 990 nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid); 991 /* Reference matched in nfs4_layoutreturn_release */ 992 pnfs_get_layout_hdr(lo); 993 empty = list_empty(&lo->plh_segs); 994 pnfs_clear_layoutcommit(ino, &tmp_list); 995 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); 996 997 if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { 998 struct pnfs_layout_range range = { 999 .iomode = IOMODE_ANY, 1000 .offset = 0, 1001 .length = NFS4_MAX_UINT64, 1002 }; 1003 NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); 1004 } 1005 1006 /* Don't send a LAYOUTRETURN if list was initially empty */ 1007 if (empty) { 1008 spin_unlock(&ino->i_lock); 1009 dprintk("NFS: %s no layout segments to return\n", __func__); 1010 goto out_put_layout_hdr; 1011 } 1012 1013 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1014 send = pnfs_prepare_layoutreturn(lo); 1015 spin_unlock(&ino->i_lock); 1016 pnfs_free_lseg_list(&tmp_list); 1017 if (send) 1018 status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1019 out_put_layout_hdr: 1020 pnfs_put_layout_hdr(lo); 1021 out: 1022 dprintk("<-- %s status: %d\n", __func__, status); 1023 return status; 1024 } 1025 EXPORT_SYMBOL_GPL(_pnfs_return_layout); 1026 1027 int 1028 pnfs_commit_and_return_layout(struct inode *inode) 1029 { 1030 struct pnfs_layout_hdr *lo; 1031 int ret; 1032 1033 spin_lock(&inode->i_lock); 1034 lo = NFS_I(inode)->layout; 1035 if (lo == NULL) { 1036 spin_unlock(&inode->i_lock); 1037 return 0; 1038 } 1039 pnfs_get_layout_hdr(lo); 1040 /* Block new layoutgets and read/write to ds */ 1041 lo->plh_block_lgets++; 1042 spin_unlock(&inode->i_lock); 1043 filemap_fdatawait(inode->i_mapping); 1044 ret = pnfs_layoutcommit_inode(inode, true); 1045 if (ret == 0) 1046 ret = _pnfs_return_layout(inode); 1047 spin_lock(&inode->i_lock); 1048 lo->plh_block_lgets--; 1049 spin_unlock(&inode->i_lock); 1050 pnfs_put_layout_hdr(lo); 1051 return ret; 1052 } 1053 1054 bool pnfs_roc(struct inode *ino) 1055 { 1056 struct nfs_inode *nfsi = NFS_I(ino); 1057 struct nfs_open_context *ctx; 1058 struct nfs4_state *state; 1059 struct pnfs_layout_hdr *lo; 1060 struct pnfs_layout_segment *lseg, *tmp; 1061 nfs4_stateid stateid; 1062 LIST_HEAD(tmp_list); 1063 bool found = false, layoutreturn = false, roc = false; 1064 1065 spin_lock(&ino->i_lock); 1066 lo = nfsi->layout; 1067 if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) 1068 goto out_noroc; 1069 1070 /* no roc if we hold a delegation */ 1071 if (nfs4_check_delegation(ino, FMODE_READ)) 1072 goto out_noroc; 1073 1074 list_for_each_entry(ctx, &nfsi->open_files, list) { 1075 state = ctx->state; 1076 /* Don't return layout if there is open file state */ 1077 if (state != NULL && state->state != 0) 1078 goto out_noroc; 1079 } 1080 1081 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1082 /* always send layoutreturn if being marked so */ 1083 if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED, 1084 &lo->plh_flags)) 1085 layoutreturn = pnfs_prepare_layoutreturn(lo); 1086 1087 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1088 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1089 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1090 mark_lseg_invalid(lseg, &tmp_list); 1091 found = true; 1092 } 1093 /* ROC in two conditions: 1094 * 1. there are ROC lsegs 1095 * 2. we don't send layoutreturn 1096 */ 1097 if (found && !layoutreturn) { 1098 /* lo ref dropped in pnfs_roc_release() */ 1099 pnfs_get_layout_hdr(lo); 1100 roc = true; 1101 } 1102 1103 out_noroc: 1104 spin_unlock(&ino->i_lock); 1105 pnfs_free_lseg_list(&tmp_list); 1106 pnfs_layoutcommit_inode(ino, true); 1107 if (layoutreturn) 1108 pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); 1109 return roc; 1110 } 1111 1112 void pnfs_roc_release(struct inode *ino) 1113 { 1114 struct pnfs_layout_hdr *lo; 1115 1116 spin_lock(&ino->i_lock); 1117 lo = NFS_I(ino)->layout; 1118 pnfs_clear_layoutreturn_waitbit(lo); 1119 if (atomic_dec_and_test(&lo->plh_refcount)) { 1120 pnfs_detach_layout_hdr(lo); 1121 spin_unlock(&ino->i_lock); 1122 pnfs_free_layout_hdr(lo); 1123 } else 1124 spin_unlock(&ino->i_lock); 1125 } 1126 1127 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 1128 { 1129 struct pnfs_layout_hdr *lo; 1130 1131 spin_lock(&ino->i_lock); 1132 lo = NFS_I(ino)->layout; 1133 pnfs_mark_layout_returned_if_empty(lo); 1134 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) 1135 lo->plh_barrier = barrier; 1136 spin_unlock(&ino->i_lock); 1137 trace_nfs4_layoutreturn_on_close(ino, 0); 1138 } 1139 1140 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) 1141 { 1142 struct nfs_inode *nfsi = NFS_I(ino); 1143 struct pnfs_layout_hdr *lo; 1144 u32 current_seqid; 1145 1146 spin_lock(&ino->i_lock); 1147 lo = nfsi->layout; 1148 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); 1149 1150 /* Since close does not return a layout stateid for use as 1151 * a barrier, we choose the worst-case barrier. 1152 */ 1153 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 1154 spin_unlock(&ino->i_lock); 1155 } 1156 1157 bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) 1158 { 1159 struct nfs_inode *nfsi = NFS_I(ino); 1160 struct pnfs_layout_hdr *lo; 1161 bool sleep = false; 1162 1163 /* we might not have grabbed lo reference. so need to check under 1164 * i_lock */ 1165 spin_lock(&ino->i_lock); 1166 lo = nfsi->layout; 1167 if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 1168 sleep = true; 1169 spin_unlock(&ino->i_lock); 1170 1171 if (sleep) 1172 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); 1173 1174 return sleep; 1175 } 1176 1177 /* 1178 * Compare two layout segments for sorting into layout cache. 1179 * We want to preferentially return RW over RO layouts, so ensure those 1180 * are seen first. 1181 */ 1182 static s64 1183 pnfs_lseg_range_cmp(const struct pnfs_layout_range *l1, 1184 const struct pnfs_layout_range *l2) 1185 { 1186 s64 d; 1187 1188 /* high offset > low offset */ 1189 d = l1->offset - l2->offset; 1190 if (d) 1191 return d; 1192 1193 /* short length > long length */ 1194 d = l2->length - l1->length; 1195 if (d) 1196 return d; 1197 1198 /* read > read/write */ 1199 return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); 1200 } 1201 1202 static bool 1203 pnfs_lseg_range_is_after(const struct pnfs_layout_range *l1, 1204 const struct pnfs_layout_range *l2) 1205 { 1206 return pnfs_lseg_range_cmp(l1, l2) > 0; 1207 } 1208 1209 static bool 1210 pnfs_lseg_no_merge(struct pnfs_layout_segment *lseg, 1211 struct pnfs_layout_segment *old) 1212 { 1213 return false; 1214 } 1215 1216 void 1217 pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1218 struct pnfs_layout_segment *lseg, 1219 bool (*is_after)(const struct pnfs_layout_range *, 1220 const struct pnfs_layout_range *), 1221 bool (*do_merge)(struct pnfs_layout_segment *, 1222 struct pnfs_layout_segment *), 1223 struct list_head *free_me) 1224 { 1225 struct pnfs_layout_segment *lp, *tmp; 1226 1227 dprintk("%s:Begin\n", __func__); 1228 1229 list_for_each_entry_safe(lp, tmp, &lo->plh_segs, pls_list) { 1230 if (test_bit(NFS_LSEG_VALID, &lp->pls_flags) == 0) 1231 continue; 1232 if (do_merge(lseg, lp)) { 1233 mark_lseg_invalid(lp, free_me); 1234 continue; 1235 } 1236 if (is_after(&lseg->pls_range, &lp->pls_range)) 1237 continue; 1238 list_add_tail(&lseg->pls_list, &lp->pls_list); 1239 dprintk("%s: inserted lseg %p " 1240 "iomode %d offset %llu length %llu before " 1241 "lp %p iomode %d offset %llu length %llu\n", 1242 __func__, lseg, lseg->pls_range.iomode, 1243 lseg->pls_range.offset, lseg->pls_range.length, 1244 lp, lp->pls_range.iomode, lp->pls_range.offset, 1245 lp->pls_range.length); 1246 goto out; 1247 } 1248 list_add_tail(&lseg->pls_list, &lo->plh_segs); 1249 dprintk("%s: inserted lseg %p " 1250 "iomode %d offset %llu length %llu at tail\n", 1251 __func__, lseg, lseg->pls_range.iomode, 1252 lseg->pls_range.offset, lseg->pls_range.length); 1253 out: 1254 pnfs_get_layout_hdr(lo); 1255 1256 dprintk("%s:Return\n", __func__); 1257 } 1258 EXPORT_SYMBOL_GPL(pnfs_generic_layout_insert_lseg); 1259 1260 static void 1261 pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo, 1262 struct pnfs_layout_segment *lseg, 1263 struct list_head *free_me) 1264 { 1265 struct inode *inode = lo->plh_inode; 1266 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 1267 1268 if (ld->add_lseg != NULL) 1269 ld->add_lseg(lo, lseg, free_me); 1270 else 1271 pnfs_generic_layout_insert_lseg(lo, lseg, 1272 pnfs_lseg_range_is_after, 1273 pnfs_lseg_no_merge, 1274 free_me); 1275 } 1276 1277 static struct pnfs_layout_hdr * 1278 alloc_init_layout_hdr(struct inode *ino, 1279 struct nfs_open_context *ctx, 1280 gfp_t gfp_flags) 1281 { 1282 struct pnfs_layout_hdr *lo; 1283 1284 lo = pnfs_alloc_layout_hdr(ino, gfp_flags); 1285 if (!lo) 1286 return NULL; 1287 atomic_set(&lo->plh_refcount, 1); 1288 INIT_LIST_HEAD(&lo->plh_layouts); 1289 INIT_LIST_HEAD(&lo->plh_segs); 1290 INIT_LIST_HEAD(&lo->plh_bulk_destroy); 1291 lo->plh_inode = ino; 1292 lo->plh_lc_cred = get_rpccred(ctx->cred); 1293 return lo; 1294 } 1295 1296 static struct pnfs_layout_hdr * 1297 pnfs_find_alloc_layout(struct inode *ino, 1298 struct nfs_open_context *ctx, 1299 gfp_t gfp_flags) 1300 { 1301 struct nfs_inode *nfsi = NFS_I(ino); 1302 struct pnfs_layout_hdr *new = NULL; 1303 1304 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 1305 1306 if (nfsi->layout != NULL) 1307 goto out_existing; 1308 spin_unlock(&ino->i_lock); 1309 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 1310 spin_lock(&ino->i_lock); 1311 1312 if (likely(nfsi->layout == NULL)) { /* Won the race? */ 1313 nfsi->layout = new; 1314 return new; 1315 } else if (new != NULL) 1316 pnfs_free_layout_hdr(new); 1317 out_existing: 1318 pnfs_get_layout_hdr(nfsi->layout); 1319 return nfsi->layout; 1320 } 1321 1322 /* 1323 * iomode matching rules: 1324 * iomode lseg strict match 1325 * iomode 1326 * ----- ----- ------ ----- 1327 * ANY READ N/A true 1328 * ANY RW N/A true 1329 * RW READ N/A false 1330 * RW RW N/A true 1331 * READ READ N/A true 1332 * READ RW true false 1333 * READ RW false true 1334 */ 1335 static bool 1336 pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, 1337 const struct pnfs_layout_range *range, 1338 bool strict_iomode) 1339 { 1340 struct pnfs_layout_range range1; 1341 1342 if ((range->iomode == IOMODE_RW && 1343 ls_range->iomode != IOMODE_RW) || 1344 (range->iomode != ls_range->iomode && 1345 strict_iomode == true) || 1346 !pnfs_lseg_range_intersecting(ls_range, range)) 1347 return 0; 1348 1349 /* range1 covers only the first byte in the range */ 1350 range1 = *range; 1351 range1.length = 1; 1352 return pnfs_lseg_range_contained(ls_range, &range1); 1353 } 1354 1355 /* 1356 * lookup range in layout 1357 */ 1358 static struct pnfs_layout_segment * 1359 pnfs_find_lseg(struct pnfs_layout_hdr *lo, 1360 struct pnfs_layout_range *range, 1361 bool strict_iomode) 1362 { 1363 struct pnfs_layout_segment *lseg, *ret = NULL; 1364 1365 dprintk("%s:Begin\n", __func__); 1366 1367 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 1368 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 1369 !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && 1370 pnfs_lseg_range_match(&lseg->pls_range, range, 1371 strict_iomode)) { 1372 ret = pnfs_get_lseg(lseg); 1373 break; 1374 } 1375 } 1376 1377 dprintk("%s:Return lseg %p ref %d\n", 1378 __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); 1379 return ret; 1380 } 1381 1382 /* 1383 * Use mdsthreshold hints set at each OPEN to determine if I/O should go 1384 * to the MDS or over pNFS 1385 * 1386 * The nfs_inode read_io and write_io fields are cumulative counters reset 1387 * when there are no layout segments. Note that in pnfs_update_layout iomode 1388 * is set to IOMODE_READ for a READ request, and set to IOMODE_RW for a 1389 * WRITE request. 1390 * 1391 * A return of true means use MDS I/O. 1392 * 1393 * From rfc 5661: 1394 * If a file's size is smaller than the file size threshold, data accesses 1395 * SHOULD be sent to the metadata server. If an I/O request has a length that 1396 * is below the I/O size threshold, the I/O SHOULD be sent to the metadata 1397 * server. If both file size and I/O size are provided, the client SHOULD 1398 * reach or exceed both thresholds before sending its read or write 1399 * requests to the data server. 1400 */ 1401 static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, 1402 struct inode *ino, int iomode) 1403 { 1404 struct nfs4_threshold *t = ctx->mdsthreshold; 1405 struct nfs_inode *nfsi = NFS_I(ino); 1406 loff_t fsize = i_size_read(ino); 1407 bool size = false, size_set = false, io = false, io_set = false, ret = false; 1408 1409 if (t == NULL) 1410 return ret; 1411 1412 dprintk("%s bm=0x%x rd_sz=%llu wr_sz=%llu rd_io=%llu wr_io=%llu\n", 1413 __func__, t->bm, t->rd_sz, t->wr_sz, t->rd_io_sz, t->wr_io_sz); 1414 1415 switch (iomode) { 1416 case IOMODE_READ: 1417 if (t->bm & THRESHOLD_RD) { 1418 dprintk("%s fsize %llu\n", __func__, fsize); 1419 size_set = true; 1420 if (fsize < t->rd_sz) 1421 size = true; 1422 } 1423 if (t->bm & THRESHOLD_RD_IO) { 1424 dprintk("%s nfsi->read_io %llu\n", __func__, 1425 nfsi->read_io); 1426 io_set = true; 1427 if (nfsi->read_io < t->rd_io_sz) 1428 io = true; 1429 } 1430 break; 1431 case IOMODE_RW: 1432 if (t->bm & THRESHOLD_WR) { 1433 dprintk("%s fsize %llu\n", __func__, fsize); 1434 size_set = true; 1435 if (fsize < t->wr_sz) 1436 size = true; 1437 } 1438 if (t->bm & THRESHOLD_WR_IO) { 1439 dprintk("%s nfsi->write_io %llu\n", __func__, 1440 nfsi->write_io); 1441 io_set = true; 1442 if (nfsi->write_io < t->wr_io_sz) 1443 io = true; 1444 } 1445 break; 1446 } 1447 if (size_set && io_set) { 1448 if (size && io) 1449 ret = true; 1450 } else if (size || io) 1451 ret = true; 1452 1453 dprintk("<-- %s size %d io %d ret %d\n", __func__, size, io, ret); 1454 return ret; 1455 } 1456 1457 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1458 { 1459 /* 1460 * send layoutcommit as it can hold up layoutreturn due to lseg 1461 * reference 1462 */ 1463 pnfs_layoutcommit_inode(lo->plh_inode, false); 1464 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1465 nfs_wait_bit_killable, 1466 TASK_UNINTERRUPTIBLE); 1467 } 1468 1469 static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) 1470 { 1471 unsigned long *bitlock = &lo->plh_flags; 1472 1473 clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); 1474 smp_mb__after_atomic(); 1475 wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); 1476 } 1477 1478 /* 1479 * Layout segment is retreived from the server if not cached. 1480 * The appropriate layout segment is referenced and returned to the caller. 1481 */ 1482 struct pnfs_layout_segment * 1483 pnfs_update_layout(struct inode *ino, 1484 struct nfs_open_context *ctx, 1485 loff_t pos, 1486 u64 count, 1487 enum pnfs_iomode iomode, 1488 bool strict_iomode, 1489 gfp_t gfp_flags) 1490 { 1491 struct pnfs_layout_range arg = { 1492 .iomode = iomode, 1493 .offset = pos, 1494 .length = count, 1495 }; 1496 unsigned pg_offset, seq; 1497 struct nfs_server *server = NFS_SERVER(ino); 1498 struct nfs_client *clp = server->nfs_client; 1499 struct pnfs_layout_hdr *lo = NULL; 1500 struct pnfs_layout_segment *lseg = NULL; 1501 nfs4_stateid stateid; 1502 long timeout = 0; 1503 unsigned long giveup = jiffies + rpc_get_timeout(server->client); 1504 bool first; 1505 1506 if (!pnfs_enabled_sb(NFS_SERVER(ino))) { 1507 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1508 PNFS_UPDATE_LAYOUT_NO_PNFS); 1509 goto out; 1510 } 1511 1512 if (iomode == IOMODE_READ && i_size_read(ino) == 0) { 1513 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1514 PNFS_UPDATE_LAYOUT_RD_ZEROLEN); 1515 goto out; 1516 } 1517 1518 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) { 1519 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1520 PNFS_UPDATE_LAYOUT_MDSTHRESH); 1521 goto out; 1522 } 1523 1524 lookup_again: 1525 first = false; 1526 spin_lock(&ino->i_lock); 1527 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1528 if (lo == NULL) { 1529 spin_unlock(&ino->i_lock); 1530 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1531 PNFS_UPDATE_LAYOUT_NOMEM); 1532 goto out; 1533 } 1534 1535 /* Do we even need to bother with this? */ 1536 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { 1537 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1538 PNFS_UPDATE_LAYOUT_BULK_RECALL); 1539 dprintk("%s matches recall, use MDS\n", __func__); 1540 goto out_unlock; 1541 } 1542 1543 /* if LAYOUTGET already failed once we don't try again */ 1544 if (pnfs_layout_io_test_failed(lo, iomode)) { 1545 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1546 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1547 goto out_unlock; 1548 } 1549 1550 lseg = pnfs_find_lseg(lo, &arg, strict_iomode); 1551 if (lseg) { 1552 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1553 PNFS_UPDATE_LAYOUT_FOUND_CACHED); 1554 goto out_unlock; 1555 } 1556 1557 if (!nfs4_valid_open_stateid(ctx->state)) { 1558 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1559 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1560 goto out_unlock; 1561 } 1562 1563 /* 1564 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1565 * stateid, or it has been invalidated, then we must use the open 1566 * stateid. 1567 */ 1568 if (lo->plh_stateid.seqid == 0 || 1569 test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { 1570 1571 /* 1572 * The first layoutget for the file. Need to serialize per 1573 * RFC 5661 Errata 3208. 1574 */ 1575 if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, 1576 &lo->plh_flags)) { 1577 spin_unlock(&ino->i_lock); 1578 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, 1579 TASK_UNINTERRUPTIBLE); 1580 pnfs_put_layout_hdr(lo); 1581 dprintk("%s retrying\n", __func__); 1582 goto lookup_again; 1583 } 1584 1585 first = true; 1586 do { 1587 seq = read_seqbegin(&ctx->state->seqlock); 1588 nfs4_stateid_copy(&stateid, &ctx->state->stateid); 1589 } while (read_seqretry(&ctx->state->seqlock, seq)); 1590 } else { 1591 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1592 } 1593 1594 /* 1595 * Because we free lsegs before sending LAYOUTRETURN, we need to wait 1596 * for LAYOUTRETURN even if first is true. 1597 */ 1598 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { 1599 spin_unlock(&ino->i_lock); 1600 dprintk("%s wait for layoutreturn\n", __func__); 1601 if (pnfs_prepare_to_retry_layoutget(lo)) { 1602 if (first) 1603 pnfs_clear_first_layoutget(lo); 1604 pnfs_put_layout_hdr(lo); 1605 dprintk("%s retrying\n", __func__); 1606 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1607 lseg, PNFS_UPDATE_LAYOUT_RETRY); 1608 goto lookup_again; 1609 } 1610 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1611 PNFS_UPDATE_LAYOUT_RETURN); 1612 goto out_put_layout_hdr; 1613 } 1614 1615 if (pnfs_layoutgets_blocked(lo)) { 1616 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1617 PNFS_UPDATE_LAYOUT_BLOCKED); 1618 goto out_unlock; 1619 } 1620 atomic_inc(&lo->plh_outstanding); 1621 spin_unlock(&ino->i_lock); 1622 1623 if (list_empty(&lo->plh_layouts)) { 1624 /* The lo must be on the clp list if there is any 1625 * chance of a CB_LAYOUTRECALL(FILE) coming in. 1626 */ 1627 spin_lock(&clp->cl_lock); 1628 if (list_empty(&lo->plh_layouts)) 1629 list_add_tail(&lo->plh_layouts, &server->layouts); 1630 spin_unlock(&clp->cl_lock); 1631 } 1632 1633 pg_offset = arg.offset & ~PAGE_MASK; 1634 if (pg_offset) { 1635 arg.offset -= pg_offset; 1636 arg.length += pg_offset; 1637 } 1638 if (arg.length != NFS4_MAX_UINT64) 1639 arg.length = PAGE_ALIGN(arg.length); 1640 1641 lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); 1642 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, 1643 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1644 if (IS_ERR(lseg)) { 1645 switch(PTR_ERR(lseg)) { 1646 case -ERECALLCONFLICT: 1647 if (time_after(jiffies, giveup)) 1648 lseg = NULL; 1649 /* Fallthrough */ 1650 case -EAGAIN: 1651 pnfs_put_layout_hdr(lo); 1652 if (first) 1653 pnfs_clear_first_layoutget(lo); 1654 if (lseg) { 1655 trace_pnfs_update_layout(ino, pos, count, 1656 iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); 1657 goto lookup_again; 1658 } 1659 /* Fallthrough */ 1660 default: 1661 if (!nfs_error_is_fatal(PTR_ERR(lseg))) { 1662 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1663 lseg = NULL; 1664 } 1665 } 1666 } else { 1667 pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); 1668 } 1669 1670 atomic_dec(&lo->plh_outstanding); 1671 out_put_layout_hdr: 1672 if (first) 1673 pnfs_clear_first_layoutget(lo); 1674 pnfs_put_layout_hdr(lo); 1675 out: 1676 dprintk("%s: inode %s/%llu pNFS layout segment %s for " 1677 "(%s, offset: %llu, length: %llu)\n", 1678 __func__, ino->i_sb->s_id, 1679 (unsigned long long)NFS_FILEID(ino), 1680 IS_ERR_OR_NULL(lseg) ? "not found" : "found", 1681 iomode==IOMODE_RW ? "read/write" : "read-only", 1682 (unsigned long long)pos, 1683 (unsigned long long)count); 1684 return lseg; 1685 out_unlock: 1686 spin_unlock(&ino->i_lock); 1687 goto out_put_layout_hdr; 1688 } 1689 EXPORT_SYMBOL_GPL(pnfs_update_layout); 1690 1691 static bool 1692 pnfs_sanity_check_layout_range(struct pnfs_layout_range *range) 1693 { 1694 switch (range->iomode) { 1695 case IOMODE_READ: 1696 case IOMODE_RW: 1697 break; 1698 default: 1699 return false; 1700 } 1701 if (range->offset == NFS4_MAX_UINT64) 1702 return false; 1703 if (range->length == 0) 1704 return false; 1705 if (range->length != NFS4_MAX_UINT64 && 1706 range->length > NFS4_MAX_UINT64 - range->offset) 1707 return false; 1708 return true; 1709 } 1710 1711 struct pnfs_layout_segment * 1712 pnfs_layout_process(struct nfs4_layoutget *lgp) 1713 { 1714 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1715 struct nfs4_layoutget_res *res = &lgp->res; 1716 struct pnfs_layout_segment *lseg; 1717 struct inode *ino = lo->plh_inode; 1718 LIST_HEAD(free_me); 1719 1720 if (!pnfs_sanity_check_layout_range(&res->range)) 1721 return ERR_PTR(-EINVAL); 1722 1723 /* Inject layout blob into I/O device driver */ 1724 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); 1725 if (IS_ERR_OR_NULL(lseg)) { 1726 if (!lseg) 1727 lseg = ERR_PTR(-ENOMEM); 1728 1729 dprintk("%s: Could not allocate layout: error %ld\n", 1730 __func__, PTR_ERR(lseg)); 1731 return lseg; 1732 } 1733 1734 init_lseg(lo, lseg); 1735 lseg->pls_range = res->range; 1736 lseg->pls_seq = be32_to_cpu(res->stateid.seqid); 1737 1738 spin_lock(&ino->i_lock); 1739 if (pnfs_layoutgets_blocked(lo)) { 1740 dprintk("%s forget reply due to state\n", __func__); 1741 goto out_forget; 1742 } 1743 1744 if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { 1745 /* existing state ID, make sure the sequence number matches. */ 1746 if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { 1747 dprintk("%s forget reply due to sequence\n", __func__); 1748 goto out_forget; 1749 } 1750 pnfs_set_layout_stateid(lo, &res->stateid, false); 1751 } else { 1752 /* 1753 * We got an entirely new state ID. Mark all segments for the 1754 * inode invalid, and don't bother validating the stateid 1755 * sequence number. 1756 */ 1757 pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL, 0); 1758 1759 nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); 1760 lo->plh_barrier = be32_to_cpu(res->stateid.seqid); 1761 } 1762 1763 clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 1764 1765 pnfs_get_lseg(lseg); 1766 pnfs_layout_insert_lseg(lo, lseg, &free_me); 1767 1768 if (res->return_on_close) 1769 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1770 1771 spin_unlock(&ino->i_lock); 1772 pnfs_free_lseg_list(&free_me); 1773 return lseg; 1774 1775 out_forget: 1776 spin_unlock(&ino->i_lock); 1777 lseg->pls_layout = lo; 1778 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1779 return ERR_PTR(-EAGAIN); 1780 } 1781 1782 static void 1783 pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, 1784 u32 seq) 1785 { 1786 if (lo->plh_return_iomode == iomode) 1787 return; 1788 if (lo->plh_return_iomode != 0) 1789 iomode = IOMODE_ANY; 1790 lo->plh_return_iomode = iomode; 1791 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 1792 if (!lo->plh_return_seq || pnfs_seqid_is_newer(seq, lo->plh_return_seq)) 1793 lo->plh_return_seq = seq; 1794 } 1795 1796 /** 1797 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments 1798 * @lo: pointer to layout header 1799 * @tmp_list: list header to be used with pnfs_free_lseg_list() 1800 * @return_range: describe layout segment ranges to be returned 1801 * 1802 * This function is mainly intended for use by layoutrecall. It attempts 1803 * to free the layout segment immediately, or else to mark it for return 1804 * as soon as its reference count drops to zero. 1805 */ 1806 int 1807 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1808 struct list_head *tmp_list, 1809 const struct pnfs_layout_range *return_range, 1810 u32 seq) 1811 { 1812 struct pnfs_layout_segment *lseg, *next; 1813 int remaining = 0; 1814 1815 dprintk("%s:Begin lo %p\n", __func__, lo); 1816 1817 if (list_empty(&lo->plh_segs)) 1818 return 0; 1819 1820 assert_spin_locked(&lo->plh_inode->i_lock); 1821 1822 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 1823 if (should_free_lseg(&lseg->pls_range, return_range)) { 1824 dprintk("%s: marking lseg %p iomode %d " 1825 "offset %llu length %llu\n", __func__, 1826 lseg, lseg->pls_range.iomode, 1827 lseg->pls_range.offset, 1828 lseg->pls_range.length); 1829 if (mark_lseg_invalid(lseg, tmp_list)) 1830 continue; 1831 remaining++; 1832 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1833 } 1834 1835 if (remaining) 1836 pnfs_set_plh_return_info(lo, return_range->iomode, seq); 1837 1838 return remaining; 1839 } 1840 1841 void pnfs_error_mark_layout_for_return(struct inode *inode, 1842 struct pnfs_layout_segment *lseg) 1843 { 1844 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1845 struct pnfs_layout_range range = { 1846 .iomode = lseg->pls_range.iomode, 1847 .offset = 0, 1848 .length = NFS4_MAX_UINT64, 1849 }; 1850 LIST_HEAD(free_me); 1851 bool return_now = false; 1852 1853 spin_lock(&inode->i_lock); 1854 pnfs_set_plh_return_info(lo, range.iomode, lseg->pls_seq); 1855 /* 1856 * mark all matching lsegs so that we are sure to have no live 1857 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 1858 * for how it works. 1859 */ 1860 if (!pnfs_mark_matching_lsegs_return(lo, &free_me, 1861 &range, lseg->pls_seq)) { 1862 nfs4_stateid stateid; 1863 enum pnfs_iomode iomode = lo->plh_return_iomode; 1864 1865 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1866 return_now = pnfs_prepare_layoutreturn(lo); 1867 spin_unlock(&inode->i_lock); 1868 if (return_now) 1869 pnfs_send_layoutreturn(lo, &stateid, iomode, false); 1870 } else { 1871 spin_unlock(&inode->i_lock); 1872 nfs_commit_inode(inode, 0); 1873 } 1874 pnfs_free_lseg_list(&free_me); 1875 } 1876 EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 1877 1878 void 1879 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1880 { 1881 u64 rd_size = req->wb_bytes; 1882 1883 if (pgio->pg_lseg == NULL) { 1884 if (pgio->pg_dreq == NULL) 1885 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 1886 else 1887 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1888 1889 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1890 req->wb_context, 1891 req_offset(req), 1892 rd_size, 1893 IOMODE_READ, 1894 false, 1895 GFP_KERNEL); 1896 if (IS_ERR(pgio->pg_lseg)) { 1897 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1898 pgio->pg_lseg = NULL; 1899 return; 1900 } 1901 } 1902 /* If no lseg, fall back to read through mds */ 1903 if (pgio->pg_lseg == NULL) 1904 nfs_pageio_reset_read_mds(pgio); 1905 1906 } 1907 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1908 1909 void 1910 pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1911 struct nfs_page *req, u64 wb_size) 1912 { 1913 if (pgio->pg_lseg == NULL) { 1914 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1915 req->wb_context, 1916 req_offset(req), 1917 wb_size, 1918 IOMODE_RW, 1919 false, 1920 GFP_NOFS); 1921 if (IS_ERR(pgio->pg_lseg)) { 1922 pgio->pg_error = PTR_ERR(pgio->pg_lseg); 1923 pgio->pg_lseg = NULL; 1924 return; 1925 } 1926 } 1927 /* If no lseg, fall back to write through mds */ 1928 if (pgio->pg_lseg == NULL) 1929 nfs_pageio_reset_write_mds(pgio); 1930 } 1931 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); 1932 1933 void 1934 pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) 1935 { 1936 if (desc->pg_lseg) { 1937 pnfs_put_lseg(desc->pg_lseg); 1938 desc->pg_lseg = NULL; 1939 } 1940 } 1941 EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); 1942 1943 /* 1944 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number 1945 * of bytes (maximum @req->wb_bytes) that can be coalesced. 1946 */ 1947 size_t 1948 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, 1949 struct nfs_page *prev, struct nfs_page *req) 1950 { 1951 unsigned int size; 1952 u64 seg_end, req_start, seg_left; 1953 1954 size = nfs_generic_pg_test(pgio, prev, req); 1955 if (!size) 1956 return 0; 1957 1958 /* 1959 * 'size' contains the number of bytes left in the current page (up 1960 * to the original size asked for in @req->wb_bytes). 1961 * 1962 * Calculate how many bytes are left in the layout segment 1963 * and if there are less bytes than 'size', return that instead. 1964 * 1965 * Please also note that 'end_offset' is actually the offset of the 1966 * first byte that lies outside the pnfs_layout_range. FIXME? 1967 * 1968 */ 1969 if (pgio->pg_lseg) { 1970 seg_end = end_offset(pgio->pg_lseg->pls_range.offset, 1971 pgio->pg_lseg->pls_range.length); 1972 req_start = req_offset(req); 1973 WARN_ON_ONCE(req_start >= seg_end); 1974 /* start of request is past the last byte of this segment */ 1975 if (req_start >= seg_end) { 1976 /* reference the new lseg */ 1977 if (pgio->pg_ops->pg_cleanup) 1978 pgio->pg_ops->pg_cleanup(pgio); 1979 if (pgio->pg_ops->pg_init) 1980 pgio->pg_ops->pg_init(pgio, req); 1981 return 0; 1982 } 1983 1984 /* adjust 'size' iff there are fewer bytes left in the 1985 * segment than what nfs_generic_pg_test returned */ 1986 seg_left = seg_end - req_start; 1987 if (seg_left < size) 1988 size = (unsigned int)seg_left; 1989 } 1990 1991 return size; 1992 } 1993 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); 1994 1995 int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) 1996 { 1997 struct nfs_pageio_descriptor pgio; 1998 1999 /* Resend all requests through the MDS */ 2000 nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, 2001 hdr->completion_ops); 2002 set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); 2003 return nfs_pageio_resend(&pgio, hdr); 2004 } 2005 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); 2006 2007 static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) 2008 { 2009 2010 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 2011 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2012 PNFS_LAYOUTRET_ON_ERROR) { 2013 pnfs_return_layout(hdr->inode); 2014 } 2015 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2016 hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); 2017 } 2018 2019 /* 2020 * Called by non rpc-based layout drivers 2021 */ 2022 void pnfs_ld_write_done(struct nfs_pgio_header *hdr) 2023 { 2024 if (likely(!hdr->pnfs_error)) { 2025 pnfs_set_layoutcommit(hdr->inode, hdr->lseg, 2026 hdr->mds_offset + hdr->res.count); 2027 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2028 } 2029 trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); 2030 if (unlikely(hdr->pnfs_error)) 2031 pnfs_ld_handle_write_error(hdr); 2032 hdr->mds_ops->rpc_release(hdr); 2033 } 2034 EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 2035 2036 static void 2037 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, 2038 struct nfs_pgio_header *hdr) 2039 { 2040 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2041 2042 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2043 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2044 nfs_pageio_reset_write_mds(desc); 2045 mirror->pg_recoalesce = 1; 2046 } 2047 nfs_pgio_data_destroy(hdr); 2048 hdr->release(hdr); 2049 } 2050 2051 static enum pnfs_try_status 2052 pnfs_try_to_write_data(struct nfs_pgio_header *hdr, 2053 const struct rpc_call_ops *call_ops, 2054 struct pnfs_layout_segment *lseg, 2055 int how) 2056 { 2057 struct inode *inode = hdr->inode; 2058 enum pnfs_try_status trypnfs; 2059 struct nfs_server *nfss = NFS_SERVER(inode); 2060 2061 hdr->mds_ops = call_ops; 2062 2063 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 2064 inode->i_ino, hdr->args.count, hdr->args.offset, how); 2065 trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); 2066 if (trypnfs != PNFS_NOT_ATTEMPTED) 2067 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); 2068 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2069 return trypnfs; 2070 } 2071 2072 static void 2073 pnfs_do_write(struct nfs_pageio_descriptor *desc, 2074 struct nfs_pgio_header *hdr, int how) 2075 { 2076 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2077 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2078 enum pnfs_try_status trypnfs; 2079 2080 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2081 if (trypnfs == PNFS_NOT_ATTEMPTED) 2082 pnfs_write_through_mds(desc, hdr); 2083 } 2084 2085 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2086 { 2087 pnfs_put_lseg(hdr->lseg); 2088 nfs_pgio_header_free(hdr); 2089 } 2090 2091 int 2092 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2093 { 2094 struct nfs_pgio_header *hdr; 2095 int ret; 2096 2097 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2098 if (!hdr) { 2099 desc->pg_error = -ENOMEM; 2100 return desc->pg_error; 2101 } 2102 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2103 2104 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2105 ret = nfs_generic_pgio(desc, hdr); 2106 if (!ret) 2107 pnfs_do_write(desc, hdr, desc->pg_ioflags); 2108 2109 return ret; 2110 } 2111 EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 2112 2113 int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) 2114 { 2115 struct nfs_pageio_descriptor pgio; 2116 2117 /* Resend all requests through the MDS */ 2118 nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); 2119 return nfs_pageio_resend(&pgio, hdr); 2120 } 2121 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); 2122 2123 static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) 2124 { 2125 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 2126 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 2127 PNFS_LAYOUTRET_ON_ERROR) { 2128 pnfs_return_layout(hdr->inode); 2129 } 2130 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 2131 hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); 2132 } 2133 2134 /* 2135 * Called by non rpc-based layout drivers 2136 */ 2137 void pnfs_ld_read_done(struct nfs_pgio_header *hdr) 2138 { 2139 if (likely(!hdr->pnfs_error)) { 2140 __nfs4_read_done_cb(hdr); 2141 hdr->mds_ops->rpc_call_done(&hdr->task, hdr); 2142 } 2143 trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); 2144 if (unlikely(hdr->pnfs_error)) 2145 pnfs_ld_handle_read_error(hdr); 2146 hdr->mds_ops->rpc_release(hdr); 2147 } 2148 EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 2149 2150 static void 2151 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, 2152 struct nfs_pgio_header *hdr) 2153 { 2154 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); 2155 2156 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2157 list_splice_tail_init(&hdr->pages, &mirror->pg_list); 2158 nfs_pageio_reset_read_mds(desc); 2159 mirror->pg_recoalesce = 1; 2160 } 2161 nfs_pgio_data_destroy(hdr); 2162 hdr->release(hdr); 2163 } 2164 2165 /* 2166 * Call the appropriate parallel I/O subsystem read function. 2167 */ 2168 static enum pnfs_try_status 2169 pnfs_try_to_read_data(struct nfs_pgio_header *hdr, 2170 const struct rpc_call_ops *call_ops, 2171 struct pnfs_layout_segment *lseg) 2172 { 2173 struct inode *inode = hdr->inode; 2174 struct nfs_server *nfss = NFS_SERVER(inode); 2175 enum pnfs_try_status trypnfs; 2176 2177 hdr->mds_ops = call_ops; 2178 2179 dprintk("%s: Reading ino:%lu %u@%llu\n", 2180 __func__, inode->i_ino, hdr->args.count, hdr->args.offset); 2181 2182 trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); 2183 if (trypnfs != PNFS_NOT_ATTEMPTED) 2184 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 2185 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 2186 return trypnfs; 2187 } 2188 2189 /* Resend all requests through pnfs. */ 2190 void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) 2191 { 2192 struct nfs_pageio_descriptor pgio; 2193 2194 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { 2195 nfs_pageio_init_read(&pgio, hdr->inode, false, 2196 hdr->completion_ops); 2197 hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); 2198 } 2199 } 2200 EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); 2201 2202 static void 2203 pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) 2204 { 2205 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; 2206 struct pnfs_layout_segment *lseg = desc->pg_lseg; 2207 enum pnfs_try_status trypnfs; 2208 2209 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2210 if (trypnfs == PNFS_TRY_AGAIN) 2211 pnfs_read_resend_pnfs(hdr); 2212 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) 2213 pnfs_read_through_mds(desc, hdr); 2214 } 2215 2216 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2217 { 2218 pnfs_put_lseg(hdr->lseg); 2219 nfs_pgio_header_free(hdr); 2220 } 2221 2222 int 2223 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2224 { 2225 struct nfs_pgio_header *hdr; 2226 int ret; 2227 2228 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2229 if (!hdr) { 2230 desc->pg_error = -ENOMEM; 2231 return desc->pg_error; 2232 } 2233 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2234 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2235 ret = nfs_generic_pgio(desc, hdr); 2236 if (!ret) 2237 pnfs_do_read(desc, hdr); 2238 return ret; 2239 } 2240 EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 2241 2242 static void pnfs_clear_layoutcommitting(struct inode *inode) 2243 { 2244 unsigned long *bitlock = &NFS_I(inode)->flags; 2245 2246 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 2247 smp_mb__after_atomic(); 2248 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); 2249 } 2250 2251 /* 2252 * There can be multiple RW segments. 2253 */ 2254 static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) 2255 { 2256 struct pnfs_layout_segment *lseg; 2257 2258 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 2259 if (lseg->pls_range.iomode == IOMODE_RW && 2260 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 2261 list_add(&lseg->pls_lc_list, listp); 2262 } 2263 } 2264 2265 static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) 2266 { 2267 struct pnfs_layout_segment *lseg, *tmp; 2268 2269 /* Matched by references in pnfs_set_layoutcommit */ 2270 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { 2271 list_del_init(&lseg->pls_lc_list); 2272 pnfs_put_lseg(lseg); 2273 } 2274 2275 pnfs_clear_layoutcommitting(inode); 2276 } 2277 2278 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 2279 { 2280 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 2281 } 2282 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 2283 2284 void 2285 pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg, 2286 loff_t end_pos) 2287 { 2288 struct nfs_inode *nfsi = NFS_I(inode); 2289 bool mark_as_dirty = false; 2290 2291 spin_lock(&inode->i_lock); 2292 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 2293 nfsi->layout->plh_lwb = end_pos; 2294 mark_as_dirty = true; 2295 dprintk("%s: Set layoutcommit for inode %lu ", 2296 __func__, inode->i_ino); 2297 } else if (end_pos > nfsi->layout->plh_lwb) 2298 nfsi->layout->plh_lwb = end_pos; 2299 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) { 2300 /* references matched in nfs4_layoutcommit_release */ 2301 pnfs_get_lseg(lseg); 2302 } 2303 spin_unlock(&inode->i_lock); 2304 dprintk("%s: lseg %p end_pos %llu\n", 2305 __func__, lseg, nfsi->layout->plh_lwb); 2306 2307 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one 2308 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ 2309 if (mark_as_dirty) 2310 mark_inode_dirty_sync(inode); 2311 } 2312 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 2313 2314 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) 2315 { 2316 struct nfs_server *nfss = NFS_SERVER(data->args.inode); 2317 2318 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 2319 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 2320 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); 2321 } 2322 2323 /* 2324 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and 2325 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough 2326 * data to disk to allow the server to recover the data if it crashes. 2327 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag 2328 * is off, and a COMMIT is sent to a data server, or 2329 * if WRITEs to a data server return NFS_DATA_SYNC. 2330 */ 2331 int 2332 pnfs_layoutcommit_inode(struct inode *inode, bool sync) 2333 { 2334 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2335 struct nfs4_layoutcommit_data *data; 2336 struct nfs_inode *nfsi = NFS_I(inode); 2337 loff_t end_pos; 2338 int status; 2339 2340 if (!pnfs_layoutcommit_outstanding(inode)) 2341 return 0; 2342 2343 dprintk("--> %s inode %lu\n", __func__, inode->i_ino); 2344 2345 status = -EAGAIN; 2346 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { 2347 if (!sync) 2348 goto out; 2349 status = wait_on_bit_lock_action(&nfsi->flags, 2350 NFS_INO_LAYOUTCOMMITTING, 2351 nfs_wait_bit_killable, 2352 TASK_KILLABLE); 2353 if (status) 2354 goto out; 2355 } 2356 2357 status = -ENOMEM; 2358 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 2359 data = kzalloc(sizeof(*data), GFP_NOFS); 2360 if (!data) 2361 goto clear_layoutcommitting; 2362 2363 status = 0; 2364 spin_lock(&inode->i_lock); 2365 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) 2366 goto out_unlock; 2367 2368 INIT_LIST_HEAD(&data->lseg_list); 2369 pnfs_list_write_lseg(inode, &data->lseg_list); 2370 2371 end_pos = nfsi->layout->plh_lwb; 2372 2373 nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); 2374 spin_unlock(&inode->i_lock); 2375 2376 data->args.inode = inode; 2377 data->cred = get_rpccred(nfsi->layout->plh_lc_cred); 2378 nfs_fattr_init(&data->fattr); 2379 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; 2380 data->res.fattr = &data->fattr; 2381 data->args.lastbytewritten = end_pos - 1; 2382 data->res.server = NFS_SERVER(inode); 2383 2384 if (ld->prepare_layoutcommit) { 2385 status = ld->prepare_layoutcommit(&data->args); 2386 if (status) { 2387 put_rpccred(data->cred); 2388 spin_lock(&inode->i_lock); 2389 set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); 2390 if (end_pos > nfsi->layout->plh_lwb) 2391 nfsi->layout->plh_lwb = end_pos; 2392 goto out_unlock; 2393 } 2394 } 2395 2396 2397 status = nfs4_proc_layoutcommit(data, sync); 2398 out: 2399 if (status) 2400 mark_inode_dirty_sync(inode); 2401 dprintk("<-- %s status %d\n", __func__, status); 2402 return status; 2403 out_unlock: 2404 spin_unlock(&inode->i_lock); 2405 kfree(data); 2406 clear_layoutcommitting: 2407 pnfs_clear_layoutcommitting(inode); 2408 goto out; 2409 } 2410 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); 2411 2412 int 2413 pnfs_generic_sync(struct inode *inode, bool datasync) 2414 { 2415 return pnfs_layoutcommit_inode(inode, true); 2416 } 2417 EXPORT_SYMBOL_GPL(pnfs_generic_sync); 2418 2419 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) 2420 { 2421 struct nfs4_threshold *thp; 2422 2423 thp = kzalloc(sizeof(*thp), GFP_NOFS); 2424 if (!thp) { 2425 dprintk("%s mdsthreshold allocation failed\n", __func__); 2426 return NULL; 2427 } 2428 return thp; 2429 } 2430 2431 #if IS_ENABLED(CONFIG_NFS_V4_2) 2432 int 2433 pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags) 2434 { 2435 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; 2436 struct nfs_server *server = NFS_SERVER(inode); 2437 struct nfs_inode *nfsi = NFS_I(inode); 2438 struct nfs42_layoutstat_data *data; 2439 struct pnfs_layout_hdr *hdr; 2440 int status = 0; 2441 2442 if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) 2443 goto out; 2444 2445 if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) 2446 goto out; 2447 2448 if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) 2449 goto out; 2450 2451 spin_lock(&inode->i_lock); 2452 if (!NFS_I(inode)->layout) { 2453 spin_unlock(&inode->i_lock); 2454 goto out_clear_layoutstats; 2455 } 2456 hdr = NFS_I(inode)->layout; 2457 pnfs_get_layout_hdr(hdr); 2458 spin_unlock(&inode->i_lock); 2459 2460 data = kzalloc(sizeof(*data), gfp_flags); 2461 if (!data) { 2462 status = -ENOMEM; 2463 goto out_put; 2464 } 2465 2466 data->args.fh = NFS_FH(inode); 2467 data->args.inode = inode; 2468 nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); 2469 status = ld->prepare_layoutstats(&data->args); 2470 if (status) 2471 goto out_free; 2472 2473 status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); 2474 2475 out: 2476 dprintk("%s returns %d\n", __func__, status); 2477 return status; 2478 2479 out_free: 2480 kfree(data); 2481 out_put: 2482 pnfs_put_layout_hdr(hdr); 2483 out_clear_layoutstats: 2484 smp_mb__before_atomic(); 2485 clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); 2486 smp_mb__after_atomic(); 2487 goto out; 2488 } 2489 EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); 2490 #endif 2491 2492 unsigned int layoutstats_timer; 2493 module_param(layoutstats_timer, uint, 0644); 2494 EXPORT_SYMBOL_GPL(layoutstats_timer); 2495