1 /* 2 * linux/fs/nfs/blocklayout/blocklayout.c 3 * 4 * Module for the NFSv4.1 pNFS block layout driver. 5 * 6 * Copyright (c) 2006 The Regents of the University of Michigan. 7 * All rights reserved. 8 * 9 * Andy Adamson <andros@citi.umich.edu> 10 * Fred Isaman <iisaman@umich.edu> 11 * 12 * permission is granted to use, copy, create derivative works and 13 * redistribute this software and such derivative works for any purpose, 14 * so long as the name of the university of michigan is not used in 15 * any advertising or publicity pertaining to the use or distribution 16 * of this software without specific, written prior authorization. if 17 * the above copyright notice or any other identification of the 18 * university of michigan is included in any copy of any portion of 19 * this software, then the disclaimer below must also be included. 20 * 21 * this software is provided as is, without representation from the 22 * university of michigan as to its fitness for any purpose, and without 23 * warranty by the university of michigan of any kind, either express 24 * or implied, including without limitation the implied warranties of 25 * merchantability and fitness for a particular purpose. the regents 26 * of the university of michigan shall not be liable for any damages, 27 * including special, indirect, incidental, or consequential damages, 28 * with respect to any claim arising out or in connection with the use 29 * of the software, even if it has been or is hereafter advised of the 30 * possibility of such damages. 31 */ 32 33 #include <linux/module.h> 34 #include <linux/init.h> 35 #include <linux/mount.h> 36 #include <linux/namei.h> 37 #include <linux/bio.h> /* struct bio */ 38 #include <linux/buffer_head.h> /* various write calls */ 39 #include <linux/prefetch.h> 40 #include <linux/pagevec.h> 41 42 #include "../pnfs.h" 43 #include "../nfs4session.h" 44 #include "../internal.h" 45 #include "blocklayout.h" 46 47 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 48 49 MODULE_LICENSE("GPL"); 50 MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); 51 MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); 52 53 static void print_page(struct page *page) 54 { 55 dprintk("PRINTPAGE page %p\n", page); 56 dprintk(" PagePrivate %d\n", PagePrivate(page)); 57 dprintk(" PageUptodate %d\n", PageUptodate(page)); 58 dprintk(" PageError %d\n", PageError(page)); 59 dprintk(" PageDirty %d\n", PageDirty(page)); 60 dprintk(" PageReferenced %d\n", PageReferenced(page)); 61 dprintk(" PageLocked %d\n", PageLocked(page)); 62 dprintk(" PageWriteback %d\n", PageWriteback(page)); 63 dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); 64 dprintk("\n"); 65 } 66 67 /* Given the be associated with isect, determine if page data needs to be 68 * initialized. 69 */ 70 static int is_hole(struct pnfs_block_extent *be, sector_t isect) 71 { 72 if (be->be_state == PNFS_BLOCK_NONE_DATA) 73 return 1; 74 else if (be->be_state != PNFS_BLOCK_INVALID_DATA) 75 return 0; 76 else 77 return !bl_is_sector_init(be->be_inval, isect); 78 } 79 80 /* Given the be associated with isect, determine if page data can be 81 * written to disk. 82 */ 83 static int is_writable(struct pnfs_block_extent *be, sector_t isect) 84 { 85 return (be->be_state == PNFS_BLOCK_READWRITE_DATA || 86 be->be_state == PNFS_BLOCK_INVALID_DATA); 87 } 88 89 /* The data we are handed might be spread across several bios. We need 90 * to track when the last one is finished. 91 */ 92 struct parallel_io { 93 struct kref refcnt; 94 void (*pnfs_callback) (void *data, int num_se); 95 void *data; 96 int bse_count; 97 }; 98 99 static inline struct parallel_io *alloc_parallel(void *data) 100 { 101 struct parallel_io *rv; 102 103 rv = kmalloc(sizeof(*rv), GFP_NOFS); 104 if (rv) { 105 rv->data = data; 106 kref_init(&rv->refcnt); 107 rv->bse_count = 0; 108 } 109 return rv; 110 } 111 112 static inline void get_parallel(struct parallel_io *p) 113 { 114 kref_get(&p->refcnt); 115 } 116 117 static void destroy_parallel(struct kref *kref) 118 { 119 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); 120 121 dprintk("%s enter\n", __func__); 122 p->pnfs_callback(p->data, p->bse_count); 123 kfree(p); 124 } 125 126 static inline void put_parallel(struct parallel_io *p) 127 { 128 kref_put(&p->refcnt, destroy_parallel); 129 } 130 131 static struct bio * 132 bl_submit_bio(int rw, struct bio *bio) 133 { 134 if (bio) { 135 get_parallel(bio->bi_private); 136 dprintk("%s submitting %s bio %u@%llu\n", __func__, 137 rw == READ ? "read" : "write", 138 bio->bi_size, (unsigned long long)bio->bi_sector); 139 submit_bio(rw, bio); 140 } 141 return NULL; 142 } 143 144 static struct bio *bl_alloc_init_bio(int npg, sector_t isect, 145 struct pnfs_block_extent *be, 146 void (*end_io)(struct bio *, int err), 147 struct parallel_io *par) 148 { 149 struct bio *bio; 150 151 npg = min(npg, BIO_MAX_PAGES); 152 bio = bio_alloc(GFP_NOIO, npg); 153 if (!bio && (current->flags & PF_MEMALLOC)) { 154 while (!bio && (npg /= 2)) 155 bio = bio_alloc(GFP_NOIO, npg); 156 } 157 158 if (bio) { 159 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 160 bio->bi_bdev = be->be_mdev; 161 bio->bi_end_io = end_io; 162 bio->bi_private = par; 163 } 164 return bio; 165 } 166 167 static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, 168 sector_t isect, struct page *page, 169 struct pnfs_block_extent *be, 170 void (*end_io)(struct bio *, int err), 171 struct parallel_io *par, 172 unsigned int offset, int len) 173 { 174 isect = isect + (offset >> SECTOR_SHIFT); 175 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, 176 npg, rw, (unsigned long long)isect, offset, len); 177 retry: 178 if (!bio) { 179 bio = bl_alloc_init_bio(npg, isect, be, end_io, par); 180 if (!bio) 181 return ERR_PTR(-ENOMEM); 182 } 183 if (bio_add_page(bio, page, len, offset) < len) { 184 bio = bl_submit_bio(rw, bio); 185 goto retry; 186 } 187 return bio; 188 } 189 190 static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, 191 sector_t isect, struct page *page, 192 struct pnfs_block_extent *be, 193 void (*end_io)(struct bio *, int err), 194 struct parallel_io *par) 195 { 196 return do_add_page_to_bio(bio, npg, rw, isect, page, be, 197 end_io, par, 0, PAGE_CACHE_SIZE); 198 } 199 200 /* This is basically copied from mpage_end_io_read */ 201 static void bl_end_io_read(struct bio *bio, int err) 202 { 203 struct parallel_io *par = bio->bi_private; 204 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 205 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 206 207 do { 208 struct page *page = bvec->bv_page; 209 210 if (--bvec >= bio->bi_io_vec) 211 prefetchw(&bvec->bv_page->flags); 212 if (uptodate) 213 SetPageUptodate(page); 214 } while (bvec >= bio->bi_io_vec); 215 if (!uptodate) { 216 struct nfs_read_data *rdata = par->data; 217 struct nfs_pgio_header *header = rdata->header; 218 219 if (!header->pnfs_error) 220 header->pnfs_error = -EIO; 221 pnfs_set_lo_fail(header->lseg); 222 } 223 bio_put(bio); 224 put_parallel(par); 225 } 226 227 static void bl_read_cleanup(struct work_struct *work) 228 { 229 struct rpc_task *task; 230 struct nfs_read_data *rdata; 231 dprintk("%s enter\n", __func__); 232 task = container_of(work, struct rpc_task, u.tk_work); 233 rdata = container_of(task, struct nfs_read_data, task); 234 pnfs_ld_read_done(rdata); 235 } 236 237 static void 238 bl_end_par_io_read(void *data, int unused) 239 { 240 struct nfs_read_data *rdata = data; 241 242 rdata->task.tk_status = rdata->header->pnfs_error; 243 INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); 244 schedule_work(&rdata->task.u.tk_work); 245 } 246 247 static enum pnfs_try_status 248 bl_read_pagelist(struct nfs_read_data *rdata) 249 { 250 struct nfs_pgio_header *header = rdata->header; 251 int i, hole; 252 struct bio *bio = NULL; 253 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 254 sector_t isect, extent_length = 0; 255 struct parallel_io *par; 256 loff_t f_offset = rdata->args.offset; 257 size_t bytes_left = rdata->args.count; 258 unsigned int pg_offset, pg_len; 259 struct page **pages = rdata->args.pages; 260 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 261 const bool is_dio = (header->dreq != NULL); 262 263 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 264 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); 265 266 par = alloc_parallel(rdata); 267 if (!par) 268 goto use_mds; 269 par->pnfs_callback = bl_end_par_io_read; 270 /* At this point, we can no longer jump to use_mds */ 271 272 isect = (sector_t) (f_offset >> SECTOR_SHIFT); 273 /* Code assumes extents are page-aligned */ 274 for (i = pg_index; i < rdata->pages.npages; i++) { 275 if (!extent_length) { 276 /* We've used up the previous extent */ 277 bl_put_extent(be); 278 bl_put_extent(cow_read); 279 bio = bl_submit_bio(READ, bio); 280 /* Get the next one */ 281 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), 282 isect, &cow_read); 283 if (!be) { 284 header->pnfs_error = -EIO; 285 goto out; 286 } 287 extent_length = be->be_length - 288 (isect - be->be_f_offset); 289 if (cow_read) { 290 sector_t cow_length = cow_read->be_length - 291 (isect - cow_read->be_f_offset); 292 extent_length = min(extent_length, cow_length); 293 } 294 } 295 296 if (is_dio) { 297 pg_offset = f_offset & ~PAGE_CACHE_MASK; 298 if (pg_offset + bytes_left > PAGE_CACHE_SIZE) 299 pg_len = PAGE_CACHE_SIZE - pg_offset; 300 else 301 pg_len = bytes_left; 302 303 f_offset += pg_len; 304 bytes_left -= pg_len; 305 isect += (pg_offset >> SECTOR_SHIFT); 306 } else { 307 pg_offset = 0; 308 pg_len = PAGE_CACHE_SIZE; 309 } 310 311 hole = is_hole(be, isect); 312 if (hole && !cow_read) { 313 bio = bl_submit_bio(READ, bio); 314 /* Fill hole w/ zeroes w/o accessing device */ 315 dprintk("%s Zeroing page for hole\n", __func__); 316 zero_user_segment(pages[i], pg_offset, pg_len); 317 print_page(pages[i]); 318 SetPageUptodate(pages[i]); 319 } else { 320 struct pnfs_block_extent *be_read; 321 322 be_read = (hole && cow_read) ? cow_read : be; 323 bio = do_add_page_to_bio(bio, rdata->pages.npages - i, 324 READ, 325 isect, pages[i], be_read, 326 bl_end_io_read, par, 327 pg_offset, pg_len); 328 if (IS_ERR(bio)) { 329 header->pnfs_error = PTR_ERR(bio); 330 bio = NULL; 331 goto out; 332 } 333 } 334 isect += (pg_len >> SECTOR_SHIFT); 335 extent_length -= PAGE_CACHE_SECTORS; 336 } 337 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 338 rdata->res.eof = 1; 339 rdata->res.count = header->inode->i_size - rdata->args.offset; 340 } else { 341 rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; 342 } 343 out: 344 bl_put_extent(be); 345 bl_put_extent(cow_read); 346 bl_submit_bio(READ, bio); 347 put_parallel(par); 348 return PNFS_ATTEMPTED; 349 350 use_mds: 351 dprintk("Giving up and using normal NFS\n"); 352 return PNFS_NOT_ATTEMPTED; 353 } 354 355 static void mark_extents_written(struct pnfs_block_layout *bl, 356 __u64 offset, __u32 count) 357 { 358 sector_t isect, end; 359 struct pnfs_block_extent *be; 360 struct pnfs_block_short_extent *se; 361 362 dprintk("%s(%llu, %u)\n", __func__, offset, count); 363 if (count == 0) 364 return; 365 isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; 366 end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); 367 end >>= SECTOR_SHIFT; 368 while (isect < end) { 369 sector_t len; 370 be = bl_find_get_extent(bl, isect, NULL); 371 BUG_ON(!be); /* FIXME */ 372 len = min(end, be->be_f_offset + be->be_length) - isect; 373 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 374 se = bl_pop_one_short_extent(be->be_inval); 375 BUG_ON(!se); 376 bl_mark_for_commit(be, isect, len, se); 377 } 378 isect += len; 379 bl_put_extent(be); 380 } 381 } 382 383 static void bl_end_io_write_zero(struct bio *bio, int err) 384 { 385 struct parallel_io *par = bio->bi_private; 386 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 387 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 388 389 do { 390 struct page *page = bvec->bv_page; 391 392 if (--bvec >= bio->bi_io_vec) 393 prefetchw(&bvec->bv_page->flags); 394 /* This is the zeroing page we added */ 395 end_page_writeback(page); 396 page_cache_release(page); 397 } while (bvec >= bio->bi_io_vec); 398 399 if (unlikely(!uptodate)) { 400 struct nfs_write_data *data = par->data; 401 struct nfs_pgio_header *header = data->header; 402 403 if (!header->pnfs_error) 404 header->pnfs_error = -EIO; 405 pnfs_set_lo_fail(header->lseg); 406 } 407 bio_put(bio); 408 put_parallel(par); 409 } 410 411 static void bl_end_io_write(struct bio *bio, int err) 412 { 413 struct parallel_io *par = bio->bi_private; 414 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 415 struct nfs_write_data *data = par->data; 416 struct nfs_pgio_header *header = data->header; 417 418 if (!uptodate) { 419 if (!header->pnfs_error) 420 header->pnfs_error = -EIO; 421 pnfs_set_lo_fail(header->lseg); 422 } 423 bio_put(bio); 424 put_parallel(par); 425 } 426 427 /* Function scheduled for call during bl_end_par_io_write, 428 * it marks sectors as written and extends the commitlist. 429 */ 430 static void bl_write_cleanup(struct work_struct *work) 431 { 432 struct rpc_task *task; 433 struct nfs_write_data *wdata; 434 dprintk("%s enter\n", __func__); 435 task = container_of(work, struct rpc_task, u.tk_work); 436 wdata = container_of(task, struct nfs_write_data, task); 437 if (likely(!wdata->header->pnfs_error)) { 438 /* Marks for LAYOUTCOMMIT */ 439 mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), 440 wdata->args.offset, wdata->args.count); 441 } 442 pnfs_ld_write_done(wdata); 443 } 444 445 /* Called when last of bios associated with a bl_write_pagelist call finishes */ 446 static void bl_end_par_io_write(void *data, int num_se) 447 { 448 struct nfs_write_data *wdata = data; 449 450 if (unlikely(wdata->header->pnfs_error)) { 451 bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, 452 num_se); 453 } 454 455 wdata->task.tk_status = wdata->header->pnfs_error; 456 wdata->verf.committed = NFS_FILE_SYNC; 457 INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); 458 schedule_work(&wdata->task.u.tk_work); 459 } 460 461 /* FIXME STUB - mark intersection of layout and page as bad, so is not 462 * used again. 463 */ 464 static void mark_bad_read(void) 465 { 466 return; 467 } 468 469 /* 470 * map_block: map a requested I/0 block (isect) into an offset in the LVM 471 * block_device 472 */ 473 static void 474 map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) 475 { 476 dprintk("%s enter be=%p\n", __func__, be); 477 478 set_buffer_mapped(bh); 479 bh->b_bdev = be->be_mdev; 480 bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> 481 (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); 482 483 dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", 484 __func__, (unsigned long long)isect, (long)bh->b_blocknr, 485 bh->b_size); 486 return; 487 } 488 489 static void 490 bl_read_single_end_io(struct bio *bio, int error) 491 { 492 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 493 struct page *page = bvec->bv_page; 494 495 /* Only one page in bvec */ 496 unlock_page(page); 497 } 498 499 static int 500 bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, 501 unsigned int offset, unsigned int len) 502 { 503 struct bio *bio; 504 struct page *shadow_page; 505 sector_t isect; 506 char *kaddr, *kshadow_addr; 507 int ret = 0; 508 509 dprintk("%s: offset %u len %u\n", __func__, offset, len); 510 511 shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 512 if (shadow_page == NULL) 513 return -ENOMEM; 514 515 bio = bio_alloc(GFP_NOIO, 1); 516 if (bio == NULL) 517 return -ENOMEM; 518 519 isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + 520 (offset / SECTOR_SIZE); 521 522 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; 523 bio->bi_bdev = be->be_mdev; 524 bio->bi_end_io = bl_read_single_end_io; 525 526 lock_page(shadow_page); 527 if (bio_add_page(bio, shadow_page, 528 SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { 529 unlock_page(shadow_page); 530 bio_put(bio); 531 return -EIO; 532 } 533 534 submit_bio(READ, bio); 535 wait_on_page_locked(shadow_page); 536 if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { 537 ret = -EIO; 538 } else { 539 kaddr = kmap_atomic(page); 540 kshadow_addr = kmap_atomic(shadow_page); 541 memcpy(kaddr + offset, kshadow_addr + offset, len); 542 kunmap_atomic(kshadow_addr); 543 kunmap_atomic(kaddr); 544 } 545 __free_page(shadow_page); 546 bio_put(bio); 547 548 return ret; 549 } 550 551 static int 552 bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, 553 unsigned int dirty_offset, unsigned int dirty_len, 554 bool full_page) 555 { 556 int ret = 0; 557 unsigned int start, end; 558 559 if (full_page) { 560 start = 0; 561 end = PAGE_CACHE_SIZE; 562 } else { 563 start = round_down(dirty_offset, SECTOR_SIZE); 564 end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); 565 } 566 567 dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); 568 if (!be) { 569 zero_user_segments(page, start, dirty_offset, 570 dirty_offset + dirty_len, end); 571 if (start == 0 && end == PAGE_CACHE_SIZE && 572 trylock_page(page)) { 573 SetPageUptodate(page); 574 unlock_page(page); 575 } 576 return ret; 577 } 578 579 if (start != dirty_offset) 580 ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); 581 582 if (!ret && (dirty_offset + dirty_len < end)) 583 ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, 584 end - dirty_offset - dirty_len); 585 586 return ret; 587 } 588 589 /* Given an unmapped page, zero it or read in page for COW, page is locked 590 * by caller. 591 */ 592 static int 593 init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) 594 { 595 struct buffer_head *bh = NULL; 596 int ret = 0; 597 sector_t isect; 598 599 dprintk("%s enter, %p\n", __func__, page); 600 BUG_ON(PageUptodate(page)); 601 if (!cow_read) { 602 zero_user_segment(page, 0, PAGE_SIZE); 603 SetPageUptodate(page); 604 goto cleanup; 605 } 606 607 bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); 608 if (!bh) { 609 ret = -ENOMEM; 610 goto cleanup; 611 } 612 613 isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; 614 map_block(bh, isect, cow_read); 615 if (!bh_uptodate_or_lock(bh)) 616 ret = bh_submit_read(bh); 617 if (ret) 618 goto cleanup; 619 SetPageUptodate(page); 620 621 cleanup: 622 if (bh) 623 free_buffer_head(bh); 624 if (ret) { 625 /* Need to mark layout with bad read...should now 626 * just use nfs4 for reads and writes. 627 */ 628 mark_bad_read(); 629 } 630 return ret; 631 } 632 633 /* Find or create a zeroing page marked being writeback. 634 * Return ERR_PTR on error, NULL to indicate skip this page and page itself 635 * to indicate write out. 636 */ 637 static struct page * 638 bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, 639 struct pnfs_block_extent *cow_read) 640 { 641 struct page *page; 642 int locked = 0; 643 page = find_get_page(inode->i_mapping, index); 644 if (page) 645 goto check_page; 646 647 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 648 if (unlikely(!page)) { 649 dprintk("%s oom\n", __func__); 650 return ERR_PTR(-ENOMEM); 651 } 652 locked = 1; 653 654 check_page: 655 /* PageDirty: Other will write this out 656 * PageWriteback: Other is writing this out 657 * PageUptodate: It was read before 658 */ 659 if (PageDirty(page) || PageWriteback(page)) { 660 print_page(page); 661 if (locked) 662 unlock_page(page); 663 page_cache_release(page); 664 return NULL; 665 } 666 667 if (!locked) { 668 lock_page(page); 669 locked = 1; 670 goto check_page; 671 } 672 if (!PageUptodate(page)) { 673 /* New page, readin or zero it */ 674 init_page_for_write(page, cow_read); 675 } 676 set_page_writeback(page); 677 unlock_page(page); 678 679 return page; 680 } 681 682 static enum pnfs_try_status 683 bl_write_pagelist(struct nfs_write_data *wdata, int sync) 684 { 685 struct nfs_pgio_header *header = wdata->header; 686 int i, ret, npg_zero, pg_index, last = 0; 687 struct bio *bio = NULL; 688 struct pnfs_block_extent *be = NULL, *cow_read = NULL; 689 sector_t isect, last_isect = 0, extent_length = 0; 690 struct parallel_io *par = NULL; 691 loff_t offset = wdata->args.offset; 692 size_t count = wdata->args.count; 693 unsigned int pg_offset, pg_len, saved_len; 694 struct page **pages = wdata->args.pages; 695 struct page *page; 696 pgoff_t index; 697 u64 temp; 698 int npg_per_block = 699 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 700 701 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 702 703 if (header->dreq != NULL && 704 (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || 705 !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { 706 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); 707 goto out_mds; 708 } 709 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 710 * We want to write each, and if there is an error set pnfs_error 711 * to have it redone using nfs. 712 */ 713 par = alloc_parallel(wdata); 714 if (!par) 715 goto out_mds; 716 par->pnfs_callback = bl_end_par_io_write; 717 /* At this point, have to be more careful with error handling */ 718 719 isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 720 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); 721 if (!be || !is_writable(be, isect)) { 722 dprintk("%s no matching extents!\n", __func__); 723 goto out_mds; 724 } 725 726 /* First page inside INVALID extent */ 727 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 728 if (likely(!bl_push_one_short_extent(be->be_inval))) 729 par->bse_count++; 730 else 731 goto out_mds; 732 temp = offset >> PAGE_CACHE_SHIFT; 733 npg_zero = do_div(temp, npg_per_block); 734 isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & 735 (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); 736 extent_length = be->be_length - (isect - be->be_f_offset); 737 738 fill_invalid_ext: 739 dprintk("%s need to zero %d pages\n", __func__, npg_zero); 740 for (;npg_zero > 0; npg_zero--) { 741 if (bl_is_sector_init(be->be_inval, isect)) { 742 dprintk("isect %llu already init\n", 743 (unsigned long long)isect); 744 goto next_page; 745 } 746 /* page ref released in bl_end_io_write_zero */ 747 index = isect >> PAGE_CACHE_SECTOR_SHIFT; 748 dprintk("%s zero %dth page: index %lu isect %llu\n", 749 __func__, npg_zero, index, 750 (unsigned long long)isect); 751 page = bl_find_get_zeroing_page(header->inode, index, 752 cow_read); 753 if (unlikely(IS_ERR(page))) { 754 header->pnfs_error = PTR_ERR(page); 755 goto out; 756 } else if (page == NULL) 757 goto next_page; 758 759 ret = bl_mark_sectors_init(be->be_inval, isect, 760 PAGE_CACHE_SECTORS); 761 if (unlikely(ret)) { 762 dprintk("%s bl_mark_sectors_init fail %d\n", 763 __func__, ret); 764 end_page_writeback(page); 765 page_cache_release(page); 766 header->pnfs_error = ret; 767 goto out; 768 } 769 if (likely(!bl_push_one_short_extent(be->be_inval))) 770 par->bse_count++; 771 else { 772 end_page_writeback(page); 773 page_cache_release(page); 774 header->pnfs_error = -ENOMEM; 775 goto out; 776 } 777 /* FIXME: This should be done in bi_end_io */ 778 mark_extents_written(BLK_LSEG2EXT(header->lseg), 779 page->index << PAGE_CACHE_SHIFT, 780 PAGE_CACHE_SIZE); 781 782 bio = bl_add_page_to_bio(bio, npg_zero, WRITE, 783 isect, page, be, 784 bl_end_io_write_zero, par); 785 if (IS_ERR(bio)) { 786 header->pnfs_error = PTR_ERR(bio); 787 bio = NULL; 788 goto out; 789 } 790 next_page: 791 isect += PAGE_CACHE_SECTORS; 792 extent_length -= PAGE_CACHE_SECTORS; 793 } 794 if (last) 795 goto write_done; 796 } 797 bio = bl_submit_bio(WRITE, bio); 798 799 /* Middle pages */ 800 pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; 801 for (i = pg_index; i < wdata->pages.npages; i++) { 802 if (!extent_length) { 803 /* We've used up the previous extent */ 804 bl_put_extent(be); 805 bl_put_extent(cow_read); 806 bio = bl_submit_bio(WRITE, bio); 807 /* Get the next one */ 808 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), 809 isect, &cow_read); 810 if (!be || !is_writable(be, isect)) { 811 header->pnfs_error = -EINVAL; 812 goto out; 813 } 814 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 815 if (likely(!bl_push_one_short_extent( 816 be->be_inval))) 817 par->bse_count++; 818 else { 819 header->pnfs_error = -ENOMEM; 820 goto out; 821 } 822 } 823 extent_length = be->be_length - 824 (isect - be->be_f_offset); 825 } 826 827 dprintk("%s offset %lld count %Zu\n", __func__, offset, count); 828 pg_offset = offset & ~PAGE_CACHE_MASK; 829 if (pg_offset + count > PAGE_CACHE_SIZE) 830 pg_len = PAGE_CACHE_SIZE - pg_offset; 831 else 832 pg_len = count; 833 834 saved_len = pg_len; 835 if (be->be_state == PNFS_BLOCK_INVALID_DATA && 836 !bl_is_sector_init(be->be_inval, isect)) { 837 ret = bl_read_partial_page_sync(pages[i], cow_read, 838 pg_offset, pg_len, true); 839 if (ret) { 840 dprintk("%s bl_read_partial_page_sync fail %d\n", 841 __func__, ret); 842 header->pnfs_error = ret; 843 goto out; 844 } 845 846 ret = bl_mark_sectors_init(be->be_inval, isect, 847 PAGE_CACHE_SECTORS); 848 if (unlikely(ret)) { 849 dprintk("%s bl_mark_sectors_init fail %d\n", 850 __func__, ret); 851 header->pnfs_error = ret; 852 goto out; 853 } 854 855 /* Expand to full page write */ 856 pg_offset = 0; 857 pg_len = PAGE_CACHE_SIZE; 858 } else if ((pg_offset & (SECTOR_SIZE - 1)) || 859 (pg_len & (SECTOR_SIZE - 1))){ 860 /* ahh, nasty case. We have to do sync full sector 861 * read-modify-write cycles. 862 */ 863 unsigned int saved_offset = pg_offset; 864 ret = bl_read_partial_page_sync(pages[i], be, pg_offset, 865 pg_len, false); 866 pg_offset = round_down(pg_offset, SECTOR_SIZE); 867 pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) 868 - pg_offset; 869 } 870 871 872 bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, 873 isect, pages[i], be, 874 bl_end_io_write, par, 875 pg_offset, pg_len); 876 if (IS_ERR(bio)) { 877 header->pnfs_error = PTR_ERR(bio); 878 bio = NULL; 879 goto out; 880 } 881 offset += saved_len; 882 count -= saved_len; 883 isect += PAGE_CACHE_SECTORS; 884 last_isect = isect; 885 extent_length -= PAGE_CACHE_SECTORS; 886 } 887 888 /* Last page inside INVALID extent */ 889 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 890 bio = bl_submit_bio(WRITE, bio); 891 temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; 892 npg_zero = npg_per_block - do_div(temp, npg_per_block); 893 if (npg_zero < npg_per_block) { 894 last = 1; 895 goto fill_invalid_ext; 896 } 897 } 898 899 write_done: 900 wdata->res.count = wdata->args.count; 901 out: 902 bl_put_extent(be); 903 bl_put_extent(cow_read); 904 bl_submit_bio(WRITE, bio); 905 put_parallel(par); 906 return PNFS_ATTEMPTED; 907 out_mds: 908 bl_put_extent(be); 909 bl_put_extent(cow_read); 910 kfree(par); 911 return PNFS_NOT_ATTEMPTED; 912 } 913 914 /* FIXME - range ignored */ 915 static void 916 release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) 917 { 918 int i; 919 struct pnfs_block_extent *be; 920 921 spin_lock(&bl->bl_ext_lock); 922 for (i = 0; i < EXTENT_LISTS; i++) { 923 while (!list_empty(&bl->bl_extents[i])) { 924 be = list_first_entry(&bl->bl_extents[i], 925 struct pnfs_block_extent, 926 be_node); 927 list_del(&be->be_node); 928 bl_put_extent(be); 929 } 930 } 931 spin_unlock(&bl->bl_ext_lock); 932 } 933 934 static void 935 release_inval_marks(struct pnfs_inval_markings *marks) 936 { 937 struct pnfs_inval_tracking *pos, *temp; 938 struct pnfs_block_short_extent *se, *stemp; 939 940 list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { 941 list_del(&pos->it_link); 942 kfree(pos); 943 } 944 945 list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { 946 list_del(&se->bse_node); 947 kfree(se); 948 } 949 return; 950 } 951 952 static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) 953 { 954 struct pnfs_block_layout *bl = BLK_LO2EXT(lo); 955 956 dprintk("%s enter\n", __func__); 957 release_extents(bl, NULL); 958 release_inval_marks(&bl->bl_inval); 959 kfree(bl); 960 } 961 962 static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, 963 gfp_t gfp_flags) 964 { 965 struct pnfs_block_layout *bl; 966 967 dprintk("%s enter\n", __func__); 968 bl = kzalloc(sizeof(*bl), gfp_flags); 969 if (!bl) 970 return NULL; 971 spin_lock_init(&bl->bl_ext_lock); 972 INIT_LIST_HEAD(&bl->bl_extents[0]); 973 INIT_LIST_HEAD(&bl->bl_extents[1]); 974 INIT_LIST_HEAD(&bl->bl_commit); 975 INIT_LIST_HEAD(&bl->bl_committing); 976 bl->bl_count = 0; 977 bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; 978 BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); 979 return &bl->bl_layout; 980 } 981 982 static void bl_free_lseg(struct pnfs_layout_segment *lseg) 983 { 984 dprintk("%s enter\n", __func__); 985 kfree(lseg); 986 } 987 988 /* We pretty much ignore lseg, and store all data layout wide, so we 989 * can correctly merge. 990 */ 991 static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, 992 struct nfs4_layoutget_res *lgr, 993 gfp_t gfp_flags) 994 { 995 struct pnfs_layout_segment *lseg; 996 int status; 997 998 dprintk("%s enter\n", __func__); 999 lseg = kzalloc(sizeof(*lseg), gfp_flags); 1000 if (!lseg) 1001 return ERR_PTR(-ENOMEM); 1002 status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); 1003 if (status) { 1004 /* We don't want to call the full-blown bl_free_lseg, 1005 * since on error extents were not touched. 1006 */ 1007 kfree(lseg); 1008 return ERR_PTR(status); 1009 } 1010 return lseg; 1011 } 1012 1013 static void 1014 bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, 1015 const struct nfs4_layoutcommit_args *arg) 1016 { 1017 dprintk("%s enter\n", __func__); 1018 encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); 1019 } 1020 1021 static void 1022 bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) 1023 { 1024 struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; 1025 1026 dprintk("%s enter\n", __func__); 1027 clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); 1028 } 1029 1030 static void free_blk_mountid(struct block_mount_id *mid) 1031 { 1032 if (mid) { 1033 struct pnfs_block_dev *dev, *tmp; 1034 1035 /* No need to take bm_lock as we are last user freeing bm_devlist */ 1036 list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { 1037 list_del(&dev->bm_node); 1038 bl_free_block_dev(dev); 1039 } 1040 kfree(mid); 1041 } 1042 } 1043 1044 /* This is mostly copied from the filelayout_get_device_info function. 1045 * It seems much of this should be at the generic pnfs level. 1046 */ 1047 static struct pnfs_block_dev * 1048 nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, 1049 struct nfs4_deviceid *d_id) 1050 { 1051 struct pnfs_device *dev; 1052 struct pnfs_block_dev *rv; 1053 u32 max_resp_sz; 1054 int max_pages; 1055 struct page **pages = NULL; 1056 int i, rc; 1057 1058 /* 1059 * Use the session max response size as the basis for setting 1060 * GETDEVICEINFO's maxcount 1061 */ 1062 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 1063 max_pages = nfs_page_array_len(0, max_resp_sz); 1064 dprintk("%s max_resp_sz %u max_pages %d\n", 1065 __func__, max_resp_sz, max_pages); 1066 1067 dev = kmalloc(sizeof(*dev), GFP_NOFS); 1068 if (!dev) { 1069 dprintk("%s kmalloc failed\n", __func__); 1070 return ERR_PTR(-ENOMEM); 1071 } 1072 1073 pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); 1074 if (pages == NULL) { 1075 kfree(dev); 1076 return ERR_PTR(-ENOMEM); 1077 } 1078 for (i = 0; i < max_pages; i++) { 1079 pages[i] = alloc_page(GFP_NOFS); 1080 if (!pages[i]) { 1081 rv = ERR_PTR(-ENOMEM); 1082 goto out_free; 1083 } 1084 } 1085 1086 memcpy(&dev->dev_id, d_id, sizeof(*d_id)); 1087 dev->layout_type = LAYOUT_BLOCK_VOLUME; 1088 dev->pages = pages; 1089 dev->pgbase = 0; 1090 dev->pglen = PAGE_SIZE * max_pages; 1091 dev->mincount = 0; 1092 1093 dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); 1094 rc = nfs4_proc_getdeviceinfo(server, dev); 1095 dprintk("%s getdevice info returns %d\n", __func__, rc); 1096 if (rc) { 1097 rv = ERR_PTR(rc); 1098 goto out_free; 1099 } 1100 1101 rv = nfs4_blk_decode_device(server, dev); 1102 out_free: 1103 for (i = 0; i < max_pages; i++) 1104 __free_page(pages[i]); 1105 kfree(pages); 1106 kfree(dev); 1107 return rv; 1108 } 1109 1110 static int 1111 bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) 1112 { 1113 struct block_mount_id *b_mt_id = NULL; 1114 struct pnfs_devicelist *dlist = NULL; 1115 struct pnfs_block_dev *bdev; 1116 LIST_HEAD(block_disklist); 1117 int status, i; 1118 1119 dprintk("%s enter\n", __func__); 1120 1121 if (server->pnfs_blksize == 0) { 1122 dprintk("%s Server did not return blksize\n", __func__); 1123 return -EINVAL; 1124 } 1125 b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); 1126 if (!b_mt_id) { 1127 status = -ENOMEM; 1128 goto out_error; 1129 } 1130 /* Initialize nfs4 block layout mount id */ 1131 spin_lock_init(&b_mt_id->bm_lock); 1132 INIT_LIST_HEAD(&b_mt_id->bm_devlist); 1133 1134 dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); 1135 if (!dlist) { 1136 status = -ENOMEM; 1137 goto out_error; 1138 } 1139 dlist->eof = 0; 1140 while (!dlist->eof) { 1141 status = nfs4_proc_getdevicelist(server, fh, dlist); 1142 if (status) 1143 goto out_error; 1144 dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", 1145 __func__, dlist->num_devs, dlist->eof); 1146 for (i = 0; i < dlist->num_devs; i++) { 1147 bdev = nfs4_blk_get_deviceinfo(server, fh, 1148 &dlist->dev_id[i]); 1149 if (IS_ERR(bdev)) { 1150 status = PTR_ERR(bdev); 1151 goto out_error; 1152 } 1153 spin_lock(&b_mt_id->bm_lock); 1154 list_add(&bdev->bm_node, &b_mt_id->bm_devlist); 1155 spin_unlock(&b_mt_id->bm_lock); 1156 } 1157 } 1158 dprintk("%s SUCCESS\n", __func__); 1159 server->pnfs_ld_data = b_mt_id; 1160 1161 out_return: 1162 kfree(dlist); 1163 return status; 1164 1165 out_error: 1166 free_blk_mountid(b_mt_id); 1167 goto out_return; 1168 } 1169 1170 static int 1171 bl_clear_layoutdriver(struct nfs_server *server) 1172 { 1173 struct block_mount_id *b_mt_id = server->pnfs_ld_data; 1174 1175 dprintk("%s enter\n", __func__); 1176 free_blk_mountid(b_mt_id); 1177 dprintk("%s RETURNS\n", __func__); 1178 return 0; 1179 } 1180 1181 static bool 1182 is_aligned_req(struct nfs_page *req, unsigned int alignment) 1183 { 1184 return IS_ALIGNED(req->wb_offset, alignment) && 1185 IS_ALIGNED(req->wb_bytes, alignment); 1186 } 1187 1188 static void 1189 bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1190 { 1191 if (pgio->pg_dreq != NULL && 1192 !is_aligned_req(req, SECTOR_SIZE)) 1193 nfs_pageio_reset_read_mds(pgio); 1194 else 1195 pnfs_generic_pg_init_read(pgio, req); 1196 } 1197 1198 static bool 1199 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1200 struct nfs_page *req) 1201 { 1202 if (pgio->pg_dreq != NULL && 1203 !is_aligned_req(req, SECTOR_SIZE)) 1204 return false; 1205 1206 return pnfs_generic_pg_test(pgio, prev, req); 1207 } 1208 1209 /* 1210 * Return the number of contiguous bytes for a given inode 1211 * starting at page frame idx. 1212 */ 1213 static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) 1214 { 1215 struct address_space *mapping = inode->i_mapping; 1216 pgoff_t end; 1217 1218 /* Optimize common case that writes from 0 to end of file */ 1219 end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); 1220 if (end != NFS_I(inode)->npages) { 1221 rcu_read_lock(); 1222 end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); 1223 rcu_read_unlock(); 1224 } 1225 1226 if (!end) 1227 return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); 1228 else 1229 return (end - idx) << PAGE_CACHE_SHIFT; 1230 } 1231 1232 static void 1233 bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1234 { 1235 if (pgio->pg_dreq != NULL && 1236 !is_aligned_req(req, PAGE_CACHE_SIZE)) { 1237 nfs_pageio_reset_write_mds(pgio); 1238 } else { 1239 u64 wb_size; 1240 if (pgio->pg_dreq == NULL) 1241 wb_size = pnfs_num_cont_bytes(pgio->pg_inode, 1242 req->wb_index); 1243 else 1244 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); 1245 1246 pnfs_generic_pg_init_write(pgio, req, wb_size); 1247 } 1248 } 1249 1250 static bool 1251 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1252 struct nfs_page *req) 1253 { 1254 if (pgio->pg_dreq != NULL && 1255 !is_aligned_req(req, PAGE_CACHE_SIZE)) 1256 return false; 1257 1258 return pnfs_generic_pg_test(pgio, prev, req); 1259 } 1260 1261 static const struct nfs_pageio_ops bl_pg_read_ops = { 1262 .pg_init = bl_pg_init_read, 1263 .pg_test = bl_pg_test_read, 1264 .pg_doio = pnfs_generic_pg_readpages, 1265 }; 1266 1267 static const struct nfs_pageio_ops bl_pg_write_ops = { 1268 .pg_init = bl_pg_init_write, 1269 .pg_test = bl_pg_test_write, 1270 .pg_doio = pnfs_generic_pg_writepages, 1271 }; 1272 1273 static struct pnfs_layoutdriver_type blocklayout_type = { 1274 .id = LAYOUT_BLOCK_VOLUME, 1275 .name = "LAYOUT_BLOCK_VOLUME", 1276 .read_pagelist = bl_read_pagelist, 1277 .write_pagelist = bl_write_pagelist, 1278 .alloc_layout_hdr = bl_alloc_layout_hdr, 1279 .free_layout_hdr = bl_free_layout_hdr, 1280 .alloc_lseg = bl_alloc_lseg, 1281 .free_lseg = bl_free_lseg, 1282 .encode_layoutcommit = bl_encode_layoutcommit, 1283 .cleanup_layoutcommit = bl_cleanup_layoutcommit, 1284 .set_layoutdriver = bl_set_layoutdriver, 1285 .clear_layoutdriver = bl_clear_layoutdriver, 1286 .pg_read_ops = &bl_pg_read_ops, 1287 .pg_write_ops = &bl_pg_write_ops, 1288 }; 1289 1290 static const struct rpc_pipe_ops bl_upcall_ops = { 1291 .upcall = rpc_pipe_generic_upcall, 1292 .downcall = bl_pipe_downcall, 1293 .destroy_msg = bl_pipe_destroy_msg, 1294 }; 1295 1296 static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, 1297 struct rpc_pipe *pipe) 1298 { 1299 struct dentry *dir, *dentry; 1300 1301 dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); 1302 if (dir == NULL) 1303 return ERR_PTR(-ENOENT); 1304 dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); 1305 dput(dir); 1306 return dentry; 1307 } 1308 1309 static void nfs4blocklayout_unregister_sb(struct super_block *sb, 1310 struct rpc_pipe *pipe) 1311 { 1312 if (pipe->dentry) 1313 rpc_unlink(pipe->dentry); 1314 } 1315 1316 static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, 1317 void *ptr) 1318 { 1319 struct super_block *sb = ptr; 1320 struct net *net = sb->s_fs_info; 1321 struct nfs_net *nn = net_generic(net, nfs_net_id); 1322 struct dentry *dentry; 1323 int ret = 0; 1324 1325 if (!try_module_get(THIS_MODULE)) 1326 return 0; 1327 1328 if (nn->bl_device_pipe == NULL) { 1329 module_put(THIS_MODULE); 1330 return 0; 1331 } 1332 1333 switch (event) { 1334 case RPC_PIPEFS_MOUNT: 1335 dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); 1336 if (IS_ERR(dentry)) { 1337 ret = PTR_ERR(dentry); 1338 break; 1339 } 1340 nn->bl_device_pipe->dentry = dentry; 1341 break; 1342 case RPC_PIPEFS_UMOUNT: 1343 if (nn->bl_device_pipe->dentry) 1344 nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); 1345 break; 1346 default: 1347 ret = -ENOTSUPP; 1348 break; 1349 } 1350 module_put(THIS_MODULE); 1351 return ret; 1352 } 1353 1354 static struct notifier_block nfs4blocklayout_block = { 1355 .notifier_call = rpc_pipefs_event, 1356 }; 1357 1358 static struct dentry *nfs4blocklayout_register_net(struct net *net, 1359 struct rpc_pipe *pipe) 1360 { 1361 struct super_block *pipefs_sb; 1362 struct dentry *dentry; 1363 1364 pipefs_sb = rpc_get_sb_net(net); 1365 if (!pipefs_sb) 1366 return NULL; 1367 dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); 1368 rpc_put_sb_net(net); 1369 return dentry; 1370 } 1371 1372 static void nfs4blocklayout_unregister_net(struct net *net, 1373 struct rpc_pipe *pipe) 1374 { 1375 struct super_block *pipefs_sb; 1376 1377 pipefs_sb = rpc_get_sb_net(net); 1378 if (pipefs_sb) { 1379 nfs4blocklayout_unregister_sb(pipefs_sb, pipe); 1380 rpc_put_sb_net(net); 1381 } 1382 } 1383 1384 static int nfs4blocklayout_net_init(struct net *net) 1385 { 1386 struct nfs_net *nn = net_generic(net, nfs_net_id); 1387 struct dentry *dentry; 1388 1389 init_waitqueue_head(&nn->bl_wq); 1390 nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); 1391 if (IS_ERR(nn->bl_device_pipe)) 1392 return PTR_ERR(nn->bl_device_pipe); 1393 dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); 1394 if (IS_ERR(dentry)) { 1395 rpc_destroy_pipe_data(nn->bl_device_pipe); 1396 return PTR_ERR(dentry); 1397 } 1398 nn->bl_device_pipe->dentry = dentry; 1399 return 0; 1400 } 1401 1402 static void nfs4blocklayout_net_exit(struct net *net) 1403 { 1404 struct nfs_net *nn = net_generic(net, nfs_net_id); 1405 1406 nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); 1407 rpc_destroy_pipe_data(nn->bl_device_pipe); 1408 nn->bl_device_pipe = NULL; 1409 } 1410 1411 static struct pernet_operations nfs4blocklayout_net_ops = { 1412 .init = nfs4blocklayout_net_init, 1413 .exit = nfs4blocklayout_net_exit, 1414 }; 1415 1416 static int __init nfs4blocklayout_init(void) 1417 { 1418 int ret; 1419 1420 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); 1421 1422 ret = pnfs_register_layoutdriver(&blocklayout_type); 1423 if (ret) 1424 goto out; 1425 1426 ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); 1427 if (ret) 1428 goto out_remove; 1429 ret = register_pernet_subsys(&nfs4blocklayout_net_ops); 1430 if (ret) 1431 goto out_notifier; 1432 out: 1433 return ret; 1434 1435 out_notifier: 1436 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 1437 out_remove: 1438 pnfs_unregister_layoutdriver(&blocklayout_type); 1439 return ret; 1440 } 1441 1442 static void __exit nfs4blocklayout_exit(void) 1443 { 1444 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", 1445 __func__); 1446 1447 rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); 1448 unregister_pernet_subsys(&nfs4blocklayout_net_ops); 1449 pnfs_unregister_layoutdriver(&blocklayout_type); 1450 } 1451 1452 MODULE_ALIAS("nfs-layouttype4-3"); 1453 1454 module_init(nfs4blocklayout_init); 1455 module_exit(nfs4blocklayout_exit); 1456