1 /* 2 * linux/fs/nfs/write.c 3 * 4 * Write file data over NFS. 5 * 6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/file.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 #include <linux/migrate.h> 17 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_mount.h> 21 #include <linux/nfs_page.h> 22 #include <linux/backing-dev.h> 23 24 #include <asm/uaccess.h> 25 26 #include "delegation.h" 27 #include "internal.h" 28 #include "iostat.h" 29 #include "nfs4_fs.h" 30 #include "fscache.h" 31 32 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 33 34 #define MIN_POOL_WRITE (32) 35 #define MIN_POOL_COMMIT (4) 36 37 /* 38 * Local function declarations 39 */ 40 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, 41 struct inode *inode, int ioflags); 42 static void nfs_redirty_request(struct nfs_page *req); 43 static const struct rpc_call_ops nfs_write_partial_ops; 44 static const struct rpc_call_ops nfs_write_full_ops; 45 static const struct rpc_call_ops nfs_commit_ops; 46 47 static struct kmem_cache *nfs_wdata_cachep; 48 static mempool_t *nfs_wdata_mempool; 49 static mempool_t *nfs_commit_mempool; 50 51 struct nfs_write_data *nfs_commitdata_alloc(void) 52 { 53 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 54 55 if (p) { 56 memset(p, 0, sizeof(*p)); 57 INIT_LIST_HEAD(&p->pages); 58 } 59 return p; 60 } 61 62 void nfs_commit_free(struct nfs_write_data *p) 63 { 64 if (p && (p->pagevec != &p->page_array[0])) 65 kfree(p->pagevec); 66 mempool_free(p, nfs_commit_mempool); 67 } 68 69 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 70 { 71 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 72 73 if (p) { 74 memset(p, 0, sizeof(*p)); 75 INIT_LIST_HEAD(&p->pages); 76 p->npages = pagecount; 77 if (pagecount <= ARRAY_SIZE(p->page_array)) 78 p->pagevec = p->page_array; 79 else { 80 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 81 if (!p->pagevec) { 82 mempool_free(p, nfs_wdata_mempool); 83 p = NULL; 84 } 85 } 86 } 87 return p; 88 } 89 90 void nfs_writedata_free(struct nfs_write_data *p) 91 { 92 if (p && (p->pagevec != &p->page_array[0])) 93 kfree(p->pagevec); 94 mempool_free(p, nfs_wdata_mempool); 95 } 96 97 static void nfs_writedata_release(struct nfs_write_data *wdata) 98 { 99 put_nfs_open_context(wdata->args.context); 100 nfs_writedata_free(wdata); 101 } 102 103 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 104 { 105 ctx->error = error; 106 smp_wmb(); 107 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 108 } 109 110 static struct nfs_page *nfs_page_find_request_locked(struct page *page) 111 { 112 struct nfs_page *req = NULL; 113 114 if (PagePrivate(page)) { 115 req = (struct nfs_page *)page_private(page); 116 if (req != NULL) 117 kref_get(&req->wb_kref); 118 } 119 return req; 120 } 121 122 static struct nfs_page *nfs_page_find_request(struct page *page) 123 { 124 struct inode *inode = page->mapping->host; 125 struct nfs_page *req = NULL; 126 127 spin_lock(&inode->i_lock); 128 req = nfs_page_find_request_locked(page); 129 spin_unlock(&inode->i_lock); 130 return req; 131 } 132 133 /* Adjust the file length if we're writing beyond the end */ 134 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) 135 { 136 struct inode *inode = page->mapping->host; 137 loff_t end, i_size; 138 pgoff_t end_index; 139 140 spin_lock(&inode->i_lock); 141 i_size = i_size_read(inode); 142 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 143 if (i_size > 0 && page->index < end_index) 144 goto out; 145 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); 146 if (i_size >= end) 147 goto out; 148 i_size_write(inode, end); 149 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); 150 out: 151 spin_unlock(&inode->i_lock); 152 } 153 154 /* A writeback failed: mark the page as bad, and invalidate the page cache */ 155 static void nfs_set_pageerror(struct page *page) 156 { 157 SetPageError(page); 158 nfs_zap_mapping(page->mapping->host, page->mapping); 159 } 160 161 /* We can set the PG_uptodate flag if we see that a write request 162 * covers the full page. 163 */ 164 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 165 { 166 if (PageUptodate(page)) 167 return; 168 if (base != 0) 169 return; 170 if (count != nfs_page_length(page)) 171 return; 172 SetPageUptodate(page); 173 } 174 175 static int wb_priority(struct writeback_control *wbc) 176 { 177 if (wbc->for_reclaim) 178 return FLUSH_HIGHPRI | FLUSH_STABLE; 179 if (wbc->for_kupdate || wbc->for_background) 180 return FLUSH_LOWPRI; 181 return 0; 182 } 183 184 /* 185 * NFS congestion control 186 */ 187 188 int nfs_congestion_kb; 189 190 #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) 191 #define NFS_CONGESTION_OFF_THRESH \ 192 (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) 193 194 static int nfs_set_page_writeback(struct page *page) 195 { 196 int ret = test_set_page_writeback(page); 197 198 if (!ret) { 199 struct inode *inode = page->mapping->host; 200 struct nfs_server *nfss = NFS_SERVER(inode); 201 202 page_cache_get(page); 203 if (atomic_long_inc_return(&nfss->writeback) > 204 NFS_CONGESTION_ON_THRESH) { 205 set_bdi_congested(&nfss->backing_dev_info, 206 BLK_RW_ASYNC); 207 } 208 } 209 return ret; 210 } 211 212 static void nfs_end_page_writeback(struct page *page) 213 { 214 struct inode *inode = page->mapping->host; 215 struct nfs_server *nfss = NFS_SERVER(inode); 216 217 end_page_writeback(page); 218 page_cache_release(page); 219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 221 } 222 223 static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) 224 { 225 struct inode *inode = page->mapping->host; 226 struct nfs_page *req; 227 int ret; 228 229 spin_lock(&inode->i_lock); 230 for (;;) { 231 req = nfs_page_find_request_locked(page); 232 if (req == NULL) 233 break; 234 if (nfs_set_page_tag_locked(req)) 235 break; 236 /* Note: If we hold the page lock, as is the case in nfs_writepage, 237 * then the call to nfs_set_page_tag_locked() will always 238 * succeed provided that someone hasn't already marked the 239 * request as dirty (in which case we don't care). 240 */ 241 spin_unlock(&inode->i_lock); 242 if (!nonblock) 243 ret = nfs_wait_on_request(req); 244 else 245 ret = -EAGAIN; 246 nfs_release_request(req); 247 if (ret != 0) 248 return ERR_PTR(ret); 249 spin_lock(&inode->i_lock); 250 } 251 spin_unlock(&inode->i_lock); 252 return req; 253 } 254 255 /* 256 * Find an associated nfs write request, and prepare to flush it out 257 * May return an error if the user signalled nfs_wait_on_request(). 258 */ 259 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 260 struct page *page, bool nonblock) 261 { 262 struct nfs_page *req; 263 int ret = 0; 264 265 req = nfs_find_and_lock_request(page, nonblock); 266 if (!req) 267 goto out; 268 ret = PTR_ERR(req); 269 if (IS_ERR(req)) 270 goto out; 271 272 ret = nfs_set_page_writeback(page); 273 BUG_ON(ret != 0); 274 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); 275 276 if (!nfs_pageio_add_request(pgio, req)) { 277 nfs_redirty_request(req); 278 ret = pgio->pg_error; 279 } 280 out: 281 return ret; 282 } 283 284 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 285 { 286 struct inode *inode = page->mapping->host; 287 int ret; 288 289 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 290 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 291 292 nfs_pageio_cond_complete(pgio, page->index); 293 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 294 if (ret == -EAGAIN) { 295 redirty_page_for_writepage(wbc, page); 296 ret = 0; 297 } 298 return ret; 299 } 300 301 /* 302 * Write an mmapped page to the server. 303 */ 304 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 305 { 306 struct nfs_pageio_descriptor pgio; 307 int err; 308 309 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 310 err = nfs_do_writepage(page, wbc, &pgio); 311 nfs_pageio_complete(&pgio); 312 if (err < 0) 313 return err; 314 if (pgio.pg_error < 0) 315 return pgio.pg_error; 316 return 0; 317 } 318 319 int nfs_writepage(struct page *page, struct writeback_control *wbc) 320 { 321 int ret; 322 323 ret = nfs_writepage_locked(page, wbc); 324 unlock_page(page); 325 return ret; 326 } 327 328 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) 329 { 330 int ret; 331 332 ret = nfs_do_writepage(page, wbc, data); 333 unlock_page(page); 334 return ret; 335 } 336 337 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 338 { 339 struct inode *inode = mapping->host; 340 unsigned long *bitlock = &NFS_I(inode)->flags; 341 struct nfs_pageio_descriptor pgio; 342 int err; 343 344 /* Stop dirtying of new pages while we sync */ 345 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 346 nfs_wait_bit_killable, TASK_KILLABLE); 347 if (err) 348 goto out_err; 349 350 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 351 352 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 353 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 354 nfs_pageio_complete(&pgio); 355 356 clear_bit_unlock(NFS_INO_FLUSHING, bitlock); 357 smp_mb__after_clear_bit(); 358 wake_up_bit(bitlock, NFS_INO_FLUSHING); 359 360 if (err < 0) 361 goto out_err; 362 err = pgio.pg_error; 363 if (err < 0) 364 goto out_err; 365 return 0; 366 out_err: 367 return err; 368 } 369 370 /* 371 * Insert a write request into an inode 372 */ 373 static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 374 { 375 struct nfs_inode *nfsi = NFS_I(inode); 376 int error; 377 378 error = radix_tree_preload(GFP_NOFS); 379 if (error != 0) 380 goto out; 381 382 /* Lock the request! */ 383 nfs_lock_request_dontget(req); 384 385 spin_lock(&inode->i_lock); 386 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 387 BUG_ON(error); 388 if (!nfsi->npages) { 389 igrab(inode); 390 if (nfs_have_delegation(inode, FMODE_WRITE)) 391 nfsi->change_attr++; 392 } 393 SetPagePrivate(req->wb_page); 394 set_page_private(req->wb_page, (unsigned long)req); 395 nfsi->npages++; 396 kref_get(&req->wb_kref); 397 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 398 NFS_PAGE_TAG_LOCKED); 399 spin_unlock(&inode->i_lock); 400 radix_tree_preload_end(); 401 out: 402 return error; 403 } 404 405 /* 406 * Remove a write request from an inode 407 */ 408 static void nfs_inode_remove_request(struct nfs_page *req) 409 { 410 struct inode *inode = req->wb_context->path.dentry->d_inode; 411 struct nfs_inode *nfsi = NFS_I(inode); 412 413 BUG_ON (!NFS_WBACK_BUSY(req)); 414 415 spin_lock(&inode->i_lock); 416 set_page_private(req->wb_page, 0); 417 ClearPagePrivate(req->wb_page); 418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 419 nfsi->npages--; 420 if (!nfsi->npages) { 421 spin_unlock(&inode->i_lock); 422 iput(inode); 423 } else 424 spin_unlock(&inode->i_lock); 425 nfs_clear_request(req); 426 nfs_release_request(req); 427 } 428 429 static void 430 nfs_mark_request_dirty(struct nfs_page *req) 431 { 432 __set_page_dirty_nobuffers(req->wb_page); 433 __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); 434 } 435 436 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 437 /* 438 * Add a request to the inode's commit list. 439 */ 440 static void 441 nfs_mark_request_commit(struct nfs_page *req) 442 { 443 struct inode *inode = req->wb_context->path.dentry->d_inode; 444 struct nfs_inode *nfsi = NFS_I(inode); 445 446 spin_lock(&inode->i_lock); 447 set_bit(PG_CLEAN, &(req)->wb_flags); 448 radix_tree_tag_set(&nfsi->nfs_page_tree, 449 req->wb_index, 450 NFS_PAGE_TAG_COMMIT); 451 nfsi->ncommit++; 452 spin_unlock(&inode->i_lock); 453 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 454 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 455 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 456 } 457 458 static int 459 nfs_clear_request_commit(struct nfs_page *req) 460 { 461 struct page *page = req->wb_page; 462 463 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { 464 dec_zone_page_state(page, NR_UNSTABLE_NFS); 465 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 466 return 1; 467 } 468 return 0; 469 } 470 471 static inline 472 int nfs_write_need_commit(struct nfs_write_data *data) 473 { 474 return data->verf.committed != NFS_FILE_SYNC; 475 } 476 477 static inline 478 int nfs_reschedule_unstable_write(struct nfs_page *req) 479 { 480 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 481 nfs_mark_request_commit(req); 482 return 1; 483 } 484 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 485 nfs_mark_request_dirty(req); 486 return 1; 487 } 488 return 0; 489 } 490 #else 491 static inline void 492 nfs_mark_request_commit(struct nfs_page *req) 493 { 494 } 495 496 static inline int 497 nfs_clear_request_commit(struct nfs_page *req) 498 { 499 return 0; 500 } 501 502 static inline 503 int nfs_write_need_commit(struct nfs_write_data *data) 504 { 505 return 0; 506 } 507 508 static inline 509 int nfs_reschedule_unstable_write(struct nfs_page *req) 510 { 511 return 0; 512 } 513 #endif 514 515 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 516 static int 517 nfs_need_commit(struct nfs_inode *nfsi) 518 { 519 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); 520 } 521 522 /* 523 * nfs_scan_commit - Scan an inode for commit requests 524 * @inode: NFS inode to scan 525 * @dst: destination list 526 * @idx_start: lower bound of page->index to scan. 527 * @npages: idx_start + npages sets the upper bound to scan. 528 * 529 * Moves requests from the inode's 'commit' request list. 530 * The requests are *not* checked to ensure that they form a contiguous set. 531 */ 532 static int 533 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 534 { 535 struct nfs_inode *nfsi = NFS_I(inode); 536 int ret; 537 538 if (!nfs_need_commit(nfsi)) 539 return 0; 540 541 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 542 if (ret > 0) 543 nfsi->ncommit -= ret; 544 if (nfs_need_commit(NFS_I(inode))) 545 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 546 return ret; 547 } 548 #else 549 static inline int nfs_need_commit(struct nfs_inode *nfsi) 550 { 551 return 0; 552 } 553 554 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 555 { 556 return 0; 557 } 558 #endif 559 560 /* 561 * Search for an existing write request, and attempt to update 562 * it to reflect a new dirty region on a given page. 563 * 564 * If the attempt fails, then the existing request is flushed out 565 * to disk. 566 */ 567 static struct nfs_page *nfs_try_to_update_request(struct inode *inode, 568 struct page *page, 569 unsigned int offset, 570 unsigned int bytes) 571 { 572 struct nfs_page *req; 573 unsigned int rqend; 574 unsigned int end; 575 int error; 576 577 if (!PagePrivate(page)) 578 return NULL; 579 580 end = offset + bytes; 581 spin_lock(&inode->i_lock); 582 583 for (;;) { 584 req = nfs_page_find_request_locked(page); 585 if (req == NULL) 586 goto out_unlock; 587 588 rqend = req->wb_offset + req->wb_bytes; 589 /* 590 * Tell the caller to flush out the request if 591 * the offsets are non-contiguous. 592 * Note: nfs_flush_incompatible() will already 593 * have flushed out requests having wrong owners. 594 */ 595 if (offset > rqend 596 || end < req->wb_offset) 597 goto out_flushme; 598 599 if (nfs_set_page_tag_locked(req)) 600 break; 601 602 /* The request is locked, so wait and then retry */ 603 spin_unlock(&inode->i_lock); 604 error = nfs_wait_on_request(req); 605 nfs_release_request(req); 606 if (error != 0) 607 goto out_err; 608 spin_lock(&inode->i_lock); 609 } 610 611 if (nfs_clear_request_commit(req) && 612 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 613 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 614 NFS_I(inode)->ncommit--; 615 616 /* Okay, the request matches. Update the region */ 617 if (offset < req->wb_offset) { 618 req->wb_offset = offset; 619 req->wb_pgbase = offset; 620 } 621 if (end > rqend) 622 req->wb_bytes = end - req->wb_offset; 623 else 624 req->wb_bytes = rqend - req->wb_offset; 625 out_unlock: 626 spin_unlock(&inode->i_lock); 627 return req; 628 out_flushme: 629 spin_unlock(&inode->i_lock); 630 nfs_release_request(req); 631 error = nfs_wb_page(inode, page); 632 out_err: 633 return ERR_PTR(error); 634 } 635 636 /* 637 * Try to update an existing write request, or create one if there is none. 638 * 639 * Note: Should always be called with the Page Lock held to prevent races 640 * if we have to add a new request. Also assumes that the caller has 641 * already called nfs_flush_incompatible() if necessary. 642 */ 643 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, 644 struct page *page, unsigned int offset, unsigned int bytes) 645 { 646 struct inode *inode = page->mapping->host; 647 struct nfs_page *req; 648 int error; 649 650 req = nfs_try_to_update_request(inode, page, offset, bytes); 651 if (req != NULL) 652 goto out; 653 req = nfs_create_request(ctx, inode, page, offset, bytes); 654 if (IS_ERR(req)) 655 goto out; 656 error = nfs_inode_add_request(inode, req); 657 if (error != 0) { 658 nfs_release_request(req); 659 req = ERR_PTR(error); 660 } 661 out: 662 return req; 663 } 664 665 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, 666 unsigned int offset, unsigned int count) 667 { 668 struct nfs_page *req; 669 670 req = nfs_setup_write_request(ctx, page, offset, count); 671 if (IS_ERR(req)) 672 return PTR_ERR(req); 673 nfs_mark_request_dirty(req); 674 /* Update file length */ 675 nfs_grow_file(page, offset, count); 676 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 677 nfs_mark_request_dirty(req); 678 nfs_clear_page_tag_locked(req); 679 return 0; 680 } 681 682 int nfs_flush_incompatible(struct file *file, struct page *page) 683 { 684 struct nfs_open_context *ctx = nfs_file_open_context(file); 685 struct nfs_page *req; 686 int do_flush, status; 687 /* 688 * Look for a request corresponding to this page. If there 689 * is one, and it belongs to another file, we flush it out 690 * before we try to copy anything into the page. Do this 691 * due to the lack of an ACCESS-type call in NFSv2. 692 * Also do the same if we find a request from an existing 693 * dropped page. 694 */ 695 do { 696 req = nfs_page_find_request(page); 697 if (req == NULL) 698 return 0; 699 do_flush = req->wb_page != page || req->wb_context != ctx || 700 req->wb_lock_context->lockowner != current->files || 701 req->wb_lock_context->pid != current->tgid; 702 nfs_release_request(req); 703 if (!do_flush) 704 return 0; 705 status = nfs_wb_page(page->mapping->host, page); 706 } while (status == 0); 707 return status; 708 } 709 710 /* 711 * If the page cache is marked as unsafe or invalid, then we can't rely on 712 * the PageUptodate() flag. In this case, we will need to turn off 713 * write optimisations that depend on the page contents being correct. 714 */ 715 static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 716 { 717 return PageUptodate(page) && 718 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 719 } 720 721 /* 722 * Update and possibly write a cached page of an NFS file. 723 * 724 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad 725 * things with a page scheduled for an RPC call (e.g. invalidate it). 726 */ 727 int nfs_updatepage(struct file *file, struct page *page, 728 unsigned int offset, unsigned int count) 729 { 730 struct nfs_open_context *ctx = nfs_file_open_context(file); 731 struct inode *inode = page->mapping->host; 732 int status = 0; 733 734 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 735 736 dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", 737 file->f_path.dentry->d_parent->d_name.name, 738 file->f_path.dentry->d_name.name, count, 739 (long long)(page_offset(page) + offset)); 740 741 /* If we're not using byte range locks, and we know the page 742 * is up to date, it may be more efficient to extend the write 743 * to cover the entire page in order to avoid fragmentation 744 * inefficiencies. 745 */ 746 if (nfs_write_pageuptodate(page, inode) && 747 inode->i_flock == NULL && 748 !(file->f_flags & O_DSYNC)) { 749 count = max(count + offset, nfs_page_length(page)); 750 offset = 0; 751 } 752 753 status = nfs_writepage_setup(ctx, page, offset, count); 754 if (status < 0) 755 nfs_set_pageerror(page); 756 757 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 758 status, (long long)i_size_read(inode)); 759 return status; 760 } 761 762 static void nfs_writepage_release(struct nfs_page *req) 763 { 764 struct page *page = req->wb_page; 765 766 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 767 nfs_inode_remove_request(req); 768 nfs_clear_page_tag_locked(req); 769 nfs_end_page_writeback(page); 770 } 771 772 static int flush_task_priority(int how) 773 { 774 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 775 case FLUSH_HIGHPRI: 776 return RPC_PRIORITY_HIGH; 777 case FLUSH_LOWPRI: 778 return RPC_PRIORITY_LOW; 779 } 780 return RPC_PRIORITY_NORMAL; 781 } 782 783 /* 784 * Set up the argument/result storage required for the RPC call. 785 */ 786 static int nfs_write_rpcsetup(struct nfs_page *req, 787 struct nfs_write_data *data, 788 const struct rpc_call_ops *call_ops, 789 unsigned int count, unsigned int offset, 790 int how) 791 { 792 struct inode *inode = req->wb_context->path.dentry->d_inode; 793 int priority = flush_task_priority(how); 794 struct rpc_task *task; 795 struct rpc_message msg = { 796 .rpc_argp = &data->args, 797 .rpc_resp = &data->res, 798 .rpc_cred = req->wb_context->cred, 799 }; 800 struct rpc_task_setup task_setup_data = { 801 .rpc_client = NFS_CLIENT(inode), 802 .task = &data->task, 803 .rpc_message = &msg, 804 .callback_ops = call_ops, 805 .callback_data = data, 806 .workqueue = nfsiod_workqueue, 807 .flags = RPC_TASK_ASYNC, 808 .priority = priority, 809 }; 810 int ret = 0; 811 812 /* Set up the RPC argument and reply structs 813 * NB: take care not to mess about with data->commit et al. */ 814 815 data->req = req; 816 data->inode = inode = req->wb_context->path.dentry->d_inode; 817 data->cred = msg.rpc_cred; 818 819 data->args.fh = NFS_FH(inode); 820 data->args.offset = req_offset(req) + offset; 821 data->args.pgbase = req->wb_pgbase + offset; 822 data->args.pages = data->pagevec; 823 data->args.count = count; 824 data->args.context = get_nfs_open_context(req->wb_context); 825 data->args.lock_context = req->wb_lock_context; 826 data->args.stable = NFS_UNSTABLE; 827 if (how & FLUSH_STABLE) { 828 data->args.stable = NFS_DATA_SYNC; 829 if (!nfs_need_commit(NFS_I(inode))) 830 data->args.stable = NFS_FILE_SYNC; 831 } 832 833 data->res.fattr = &data->fattr; 834 data->res.count = count; 835 data->res.verf = &data->verf; 836 nfs_fattr_init(&data->fattr); 837 838 /* Set up the initial task struct. */ 839 NFS_PROTO(inode)->write_setup(data, &msg); 840 841 dprintk("NFS: %5u initiated write call " 842 "(req %s/%lld, %u bytes @ offset %llu)\n", 843 data->task.tk_pid, 844 inode->i_sb->s_id, 845 (long long)NFS_FILEID(inode), 846 count, 847 (unsigned long long)data->args.offset); 848 849 task = rpc_run_task(&task_setup_data); 850 if (IS_ERR(task)) { 851 ret = PTR_ERR(task); 852 goto out; 853 } 854 if (how & FLUSH_SYNC) { 855 ret = rpc_wait_for_completion_task(task); 856 if (ret == 0) 857 ret = task->tk_status; 858 } 859 rpc_put_task(task); 860 out: 861 return ret; 862 } 863 864 /* If a nfs_flush_* function fails, it should remove reqs from @head and 865 * call this on each, which will prepare them to be retried on next 866 * writeback using standard nfs. 867 */ 868 static void nfs_redirty_request(struct nfs_page *req) 869 { 870 struct page *page = req->wb_page; 871 872 nfs_mark_request_dirty(req); 873 nfs_clear_page_tag_locked(req); 874 nfs_end_page_writeback(page); 875 } 876 877 /* 878 * Generate multiple small requests to write out a single 879 * contiguous dirty area on one page. 880 */ 881 static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 882 { 883 struct nfs_page *req = nfs_list_entry(head->next); 884 struct page *page = req->wb_page; 885 struct nfs_write_data *data; 886 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 887 unsigned int offset; 888 int requests = 0; 889 int ret = 0; 890 LIST_HEAD(list); 891 892 nfs_list_remove_request(req); 893 894 nbytes = count; 895 do { 896 size_t len = min(nbytes, wsize); 897 898 data = nfs_writedata_alloc(1); 899 if (!data) 900 goto out_bad; 901 list_add(&data->pages, &list); 902 requests++; 903 nbytes -= len; 904 } while (nbytes != 0); 905 atomic_set(&req->wb_complete, requests); 906 907 ClearPageError(page); 908 offset = 0; 909 nbytes = count; 910 do { 911 int ret2; 912 913 data = list_entry(list.next, struct nfs_write_data, pages); 914 list_del_init(&data->pages); 915 916 data->pagevec[0] = page; 917 918 if (nbytes < wsize) 919 wsize = nbytes; 920 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 921 wsize, offset, how); 922 if (ret == 0) 923 ret = ret2; 924 offset += wsize; 925 nbytes -= wsize; 926 } while (nbytes != 0); 927 928 return ret; 929 930 out_bad: 931 while (!list_empty(&list)) { 932 data = list_entry(list.next, struct nfs_write_data, pages); 933 list_del(&data->pages); 934 nfs_writedata_release(data); 935 } 936 nfs_redirty_request(req); 937 return -ENOMEM; 938 } 939 940 /* 941 * Create an RPC task for the given write request and kick it. 942 * The page must have been locked by the caller. 943 * 944 * It may happen that the page we're passed is not marked dirty. 945 * This is the case if nfs_updatepage detects a conflicting request 946 * that has been written but not committed. 947 */ 948 static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 949 { 950 struct nfs_page *req; 951 struct page **pages; 952 struct nfs_write_data *data; 953 954 data = nfs_writedata_alloc(npages); 955 if (!data) 956 goto out_bad; 957 958 pages = data->pagevec; 959 while (!list_empty(head)) { 960 req = nfs_list_entry(head->next); 961 nfs_list_remove_request(req); 962 nfs_list_add_request(req, &data->pages); 963 ClearPageError(req->wb_page); 964 *pages++ = req->wb_page; 965 } 966 req = nfs_list_entry(data->pages.next); 967 968 /* Set up the argument struct */ 969 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 970 out_bad: 971 while (!list_empty(head)) { 972 req = nfs_list_entry(head->next); 973 nfs_list_remove_request(req); 974 nfs_redirty_request(req); 975 } 976 return -ENOMEM; 977 } 978 979 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 980 struct inode *inode, int ioflags) 981 { 982 size_t wsize = NFS_SERVER(inode)->wsize; 983 984 if (wsize < PAGE_CACHE_SIZE) 985 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 986 else 987 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); 988 } 989 990 /* 991 * Handle a write reply that flushed part of a page. 992 */ 993 static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) 994 { 995 struct nfs_write_data *data = calldata; 996 997 dprintk("NFS: %5u write(%s/%lld %d@%lld)", 998 task->tk_pid, 999 data->req->wb_context->path.dentry->d_inode->i_sb->s_id, 1000 (long long) 1001 NFS_FILEID(data->req->wb_context->path.dentry->d_inode), 1002 data->req->wb_bytes, (long long)req_offset(data->req)); 1003 1004 nfs_writeback_done(task, data); 1005 } 1006 1007 static void nfs_writeback_release_partial(void *calldata) 1008 { 1009 struct nfs_write_data *data = calldata; 1010 struct nfs_page *req = data->req; 1011 struct page *page = req->wb_page; 1012 int status = data->task.tk_status; 1013 1014 if (status < 0) { 1015 nfs_set_pageerror(page); 1016 nfs_context_set_write_error(req->wb_context, status); 1017 dprintk(", error = %d\n", status); 1018 goto out; 1019 } 1020 1021 if (nfs_write_need_commit(data)) { 1022 struct inode *inode = page->mapping->host; 1023 1024 spin_lock(&inode->i_lock); 1025 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 1026 /* Do nothing we need to resend the writes */ 1027 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 1028 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1029 dprintk(" defer commit\n"); 1030 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { 1031 set_bit(PG_NEED_RESCHED, &req->wb_flags); 1032 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 1033 dprintk(" server reboot detected\n"); 1034 } 1035 spin_unlock(&inode->i_lock); 1036 } else 1037 dprintk(" OK\n"); 1038 1039 out: 1040 if (atomic_dec_and_test(&req->wb_complete)) 1041 nfs_writepage_release(req); 1042 nfs_writedata_release(calldata); 1043 } 1044 1045 #if defined(CONFIG_NFS_V4_1) 1046 void nfs_write_prepare(struct rpc_task *task, void *calldata) 1047 { 1048 struct nfs_write_data *data = calldata; 1049 1050 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 1051 &data->args.seq_args, 1052 &data->res.seq_res, 1, task)) 1053 return; 1054 rpc_call_start(task); 1055 } 1056 #endif /* CONFIG_NFS_V4_1 */ 1057 1058 static const struct rpc_call_ops nfs_write_partial_ops = { 1059 #if defined(CONFIG_NFS_V4_1) 1060 .rpc_call_prepare = nfs_write_prepare, 1061 #endif /* CONFIG_NFS_V4_1 */ 1062 .rpc_call_done = nfs_writeback_done_partial, 1063 .rpc_release = nfs_writeback_release_partial, 1064 }; 1065 1066 /* 1067 * Handle a write reply that flushes a whole page. 1068 * 1069 * FIXME: There is an inherent race with invalidate_inode_pages and 1070 * writebacks since the page->count is kept > 1 for as long 1071 * as the page has a write request pending. 1072 */ 1073 static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1074 { 1075 struct nfs_write_data *data = calldata; 1076 1077 nfs_writeback_done(task, data); 1078 } 1079 1080 static void nfs_writeback_release_full(void *calldata) 1081 { 1082 struct nfs_write_data *data = calldata; 1083 int status = data->task.tk_status; 1084 1085 /* Update attributes as result of writeback. */ 1086 while (!list_empty(&data->pages)) { 1087 struct nfs_page *req = nfs_list_entry(data->pages.next); 1088 struct page *page = req->wb_page; 1089 1090 nfs_list_remove_request(req); 1091 1092 dprintk("NFS: %5u write (%s/%lld %d@%lld)", 1093 data->task.tk_pid, 1094 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1095 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1096 req->wb_bytes, 1097 (long long)req_offset(req)); 1098 1099 if (status < 0) { 1100 nfs_set_pageerror(page); 1101 nfs_context_set_write_error(req->wb_context, status); 1102 dprintk(", error = %d\n", status); 1103 goto remove_request; 1104 } 1105 1106 if (nfs_write_need_commit(data)) { 1107 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1108 nfs_mark_request_commit(req); 1109 dprintk(" marked for commit\n"); 1110 goto next; 1111 } 1112 dprintk(" OK\n"); 1113 remove_request: 1114 nfs_inode_remove_request(req); 1115 next: 1116 nfs_clear_page_tag_locked(req); 1117 nfs_end_page_writeback(page); 1118 } 1119 nfs_writedata_release(calldata); 1120 } 1121 1122 static const struct rpc_call_ops nfs_write_full_ops = { 1123 #if defined(CONFIG_NFS_V4_1) 1124 .rpc_call_prepare = nfs_write_prepare, 1125 #endif /* CONFIG_NFS_V4_1 */ 1126 .rpc_call_done = nfs_writeback_done_full, 1127 .rpc_release = nfs_writeback_release_full, 1128 }; 1129 1130 1131 /* 1132 * This function is called when the WRITE call is complete. 1133 */ 1134 int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1135 { 1136 struct nfs_writeargs *argp = &data->args; 1137 struct nfs_writeres *resp = &data->res; 1138 struct nfs_server *server = NFS_SERVER(data->inode); 1139 int status; 1140 1141 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1142 task->tk_pid, task->tk_status); 1143 1144 /* 1145 * ->write_done will attempt to use post-op attributes to detect 1146 * conflicting writes by other clients. A strict interpretation 1147 * of close-to-open would allow us to continue caching even if 1148 * another writer had changed the file, but some applications 1149 * depend on tighter cache coherency when writing. 1150 */ 1151 status = NFS_PROTO(data->inode)->write_done(task, data); 1152 if (status != 0) 1153 return status; 1154 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1155 1156 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1157 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1158 /* We tried a write call, but the server did not 1159 * commit data to stable storage even though we 1160 * requested it. 1161 * Note: There is a known bug in Tru64 < 5.0 in which 1162 * the server reports NFS_DATA_SYNC, but performs 1163 * NFS_FILE_SYNC. We therefore implement this checking 1164 * as a dprintk() in order to avoid filling syslog. 1165 */ 1166 static unsigned long complain; 1167 1168 if (time_before(complain, jiffies)) { 1169 dprintk("NFS: faulty NFS server %s:" 1170 " (committed = %d) != (stable = %d)\n", 1171 server->nfs_client->cl_hostname, 1172 resp->verf->committed, argp->stable); 1173 complain = jiffies + 300 * HZ; 1174 } 1175 } 1176 #endif 1177 /* Is this a short write? */ 1178 if (task->tk_status >= 0 && resp->count < argp->count) { 1179 static unsigned long complain; 1180 1181 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1182 1183 /* Has the server at least made some progress? */ 1184 if (resp->count != 0) { 1185 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1186 if (resp->verf->committed != NFS_UNSTABLE) { 1187 /* Resend from where the server left off */ 1188 argp->offset += resp->count; 1189 argp->pgbase += resp->count; 1190 argp->count -= resp->count; 1191 } else { 1192 /* Resend as a stable write in order to avoid 1193 * headaches in the case of a server crash. 1194 */ 1195 argp->stable = NFS_FILE_SYNC; 1196 } 1197 nfs_restart_rpc(task, server->nfs_client); 1198 return -EAGAIN; 1199 } 1200 if (time_before(complain, jiffies)) { 1201 printk(KERN_WARNING 1202 "NFS: Server wrote zero bytes, expected %u.\n", 1203 argp->count); 1204 complain = jiffies + 300 * HZ; 1205 } 1206 /* Can't do anything about it except throw an error. */ 1207 task->tk_status = -EIO; 1208 } 1209 return 0; 1210 } 1211 1212 1213 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1214 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1215 { 1216 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1217 return 1; 1218 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1219 NFS_INO_COMMIT, nfs_wait_bit_killable, 1220 TASK_KILLABLE)) 1221 return 1; 1222 return 0; 1223 } 1224 1225 static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1226 { 1227 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1228 smp_mb__after_clear_bit(); 1229 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1230 } 1231 1232 1233 static void nfs_commitdata_release(void *data) 1234 { 1235 struct nfs_write_data *wdata = data; 1236 1237 put_nfs_open_context(wdata->args.context); 1238 nfs_commit_free(wdata); 1239 } 1240 1241 /* 1242 * Set up the argument/result storage required for the RPC call. 1243 */ 1244 static int nfs_commit_rpcsetup(struct list_head *head, 1245 struct nfs_write_data *data, 1246 int how) 1247 { 1248 struct nfs_page *first = nfs_list_entry(head->next); 1249 struct inode *inode = first->wb_context->path.dentry->d_inode; 1250 int priority = flush_task_priority(how); 1251 struct rpc_task *task; 1252 struct rpc_message msg = { 1253 .rpc_argp = &data->args, 1254 .rpc_resp = &data->res, 1255 .rpc_cred = first->wb_context->cred, 1256 }; 1257 struct rpc_task_setup task_setup_data = { 1258 .task = &data->task, 1259 .rpc_client = NFS_CLIENT(inode), 1260 .rpc_message = &msg, 1261 .callback_ops = &nfs_commit_ops, 1262 .callback_data = data, 1263 .workqueue = nfsiod_workqueue, 1264 .flags = RPC_TASK_ASYNC, 1265 .priority = priority, 1266 }; 1267 1268 /* Set up the RPC argument and reply structs 1269 * NB: take care not to mess about with data->commit et al. */ 1270 1271 list_splice_init(head, &data->pages); 1272 1273 data->inode = inode; 1274 data->cred = msg.rpc_cred; 1275 1276 data->args.fh = NFS_FH(data->inode); 1277 /* Note: we always request a commit of the entire inode */ 1278 data->args.offset = 0; 1279 data->args.count = 0; 1280 data->args.context = get_nfs_open_context(first->wb_context); 1281 data->res.count = 0; 1282 data->res.fattr = &data->fattr; 1283 data->res.verf = &data->verf; 1284 nfs_fattr_init(&data->fattr); 1285 1286 /* Set up the initial task struct. */ 1287 NFS_PROTO(inode)->commit_setup(data, &msg); 1288 1289 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1290 1291 task = rpc_run_task(&task_setup_data); 1292 if (IS_ERR(task)) 1293 return PTR_ERR(task); 1294 rpc_put_task(task); 1295 return 0; 1296 } 1297 1298 /* 1299 * Commit dirty pages 1300 */ 1301 static int 1302 nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1303 { 1304 struct nfs_write_data *data; 1305 struct nfs_page *req; 1306 1307 data = nfs_commitdata_alloc(); 1308 1309 if (!data) 1310 goto out_bad; 1311 1312 /* Set up the argument struct */ 1313 return nfs_commit_rpcsetup(head, data, how); 1314 out_bad: 1315 while (!list_empty(head)) { 1316 req = nfs_list_entry(head->next); 1317 nfs_list_remove_request(req); 1318 nfs_mark_request_commit(req); 1319 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1320 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1321 BDI_RECLAIMABLE); 1322 nfs_clear_page_tag_locked(req); 1323 } 1324 nfs_commit_clear_lock(NFS_I(inode)); 1325 return -ENOMEM; 1326 } 1327 1328 /* 1329 * COMMIT call returned 1330 */ 1331 static void nfs_commit_done(struct rpc_task *task, void *calldata) 1332 { 1333 struct nfs_write_data *data = calldata; 1334 1335 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1336 task->tk_pid, task->tk_status); 1337 1338 /* Call the NFS version-specific code */ 1339 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1340 return; 1341 } 1342 1343 static void nfs_commit_release(void *calldata) 1344 { 1345 struct nfs_write_data *data = calldata; 1346 struct nfs_page *req; 1347 int status = data->task.tk_status; 1348 1349 while (!list_empty(&data->pages)) { 1350 req = nfs_list_entry(data->pages.next); 1351 nfs_list_remove_request(req); 1352 nfs_clear_request_commit(req); 1353 1354 dprintk("NFS: commit (%s/%lld %d@%lld)", 1355 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1356 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1357 req->wb_bytes, 1358 (long long)req_offset(req)); 1359 if (status < 0) { 1360 nfs_context_set_write_error(req->wb_context, status); 1361 nfs_inode_remove_request(req); 1362 dprintk(", error = %d\n", status); 1363 goto next; 1364 } 1365 1366 /* Okay, COMMIT succeeded, apparently. Check the verifier 1367 * returned by the server against all stored verfs. */ 1368 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1369 /* We have a match */ 1370 nfs_inode_remove_request(req); 1371 dprintk(" OK\n"); 1372 goto next; 1373 } 1374 /* We have a mismatch. Write the page again */ 1375 dprintk(" mismatch\n"); 1376 nfs_mark_request_dirty(req); 1377 next: 1378 nfs_clear_page_tag_locked(req); 1379 } 1380 nfs_commit_clear_lock(NFS_I(data->inode)); 1381 nfs_commitdata_release(calldata); 1382 } 1383 1384 static const struct rpc_call_ops nfs_commit_ops = { 1385 #if defined(CONFIG_NFS_V4_1) 1386 .rpc_call_prepare = nfs_write_prepare, 1387 #endif /* CONFIG_NFS_V4_1 */ 1388 .rpc_call_done = nfs_commit_done, 1389 .rpc_release = nfs_commit_release, 1390 }; 1391 1392 int nfs_commit_inode(struct inode *inode, int how) 1393 { 1394 LIST_HEAD(head); 1395 int may_wait = how & FLUSH_SYNC; 1396 int res = 0; 1397 1398 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1399 goto out_mark_dirty; 1400 spin_lock(&inode->i_lock); 1401 res = nfs_scan_commit(inode, &head, 0, 0); 1402 spin_unlock(&inode->i_lock); 1403 if (res) { 1404 int error = nfs_commit_list(inode, &head, how); 1405 if (error < 0) 1406 return error; 1407 if (may_wait) 1408 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, 1409 nfs_wait_bit_killable, 1410 TASK_KILLABLE); 1411 else 1412 goto out_mark_dirty; 1413 } else 1414 nfs_commit_clear_lock(NFS_I(inode)); 1415 return res; 1416 /* Note: If we exit without ensuring that the commit is complete, 1417 * we must mark the inode as dirty. Otherwise, future calls to 1418 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure 1419 * that the data is on the disk. 1420 */ 1421 out_mark_dirty: 1422 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1423 return res; 1424 } 1425 1426 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1427 { 1428 struct nfs_inode *nfsi = NFS_I(inode); 1429 int flags = FLUSH_SYNC; 1430 int ret = 0; 1431 1432 if (wbc->sync_mode == WB_SYNC_NONE) { 1433 /* Don't commit yet if this is a non-blocking flush and there 1434 * are a lot of outstanding writes for this mapping. 1435 */ 1436 if (nfsi->ncommit <= (nfsi->npages >> 1)) 1437 goto out_mark_dirty; 1438 1439 /* don't wait for the COMMIT response */ 1440 flags = 0; 1441 } 1442 1443 ret = nfs_commit_inode(inode, flags); 1444 if (ret >= 0) { 1445 if (wbc->sync_mode == WB_SYNC_NONE) { 1446 if (ret < wbc->nr_to_write) 1447 wbc->nr_to_write -= ret; 1448 else 1449 wbc->nr_to_write = 0; 1450 } 1451 return 0; 1452 } 1453 out_mark_dirty: 1454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1455 return ret; 1456 } 1457 #else 1458 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1459 { 1460 return 0; 1461 } 1462 #endif 1463 1464 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1465 { 1466 return nfs_commit_unstable_pages(inode, wbc); 1467 } 1468 1469 /* 1470 * flush the inode to disk. 1471 */ 1472 int nfs_wb_all(struct inode *inode) 1473 { 1474 struct writeback_control wbc = { 1475 .sync_mode = WB_SYNC_ALL, 1476 .nr_to_write = LONG_MAX, 1477 .range_start = 0, 1478 .range_end = LLONG_MAX, 1479 }; 1480 1481 return sync_inode(inode, &wbc); 1482 } 1483 1484 int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1485 { 1486 struct nfs_page *req; 1487 int ret = 0; 1488 1489 BUG_ON(!PageLocked(page)); 1490 for (;;) { 1491 wait_on_page_writeback(page); 1492 req = nfs_page_find_request(page); 1493 if (req == NULL) 1494 break; 1495 if (nfs_lock_request_dontget(req)) { 1496 nfs_inode_remove_request(req); 1497 /* 1498 * In case nfs_inode_remove_request has marked the 1499 * page as being dirty 1500 */ 1501 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1502 nfs_unlock_request(req); 1503 break; 1504 } 1505 ret = nfs_wait_on_request(req); 1506 nfs_release_request(req); 1507 if (ret < 0) 1508 break; 1509 } 1510 return ret; 1511 } 1512 1513 /* 1514 * Write back all requests on one page - we do this before reading it. 1515 */ 1516 int nfs_wb_page(struct inode *inode, struct page *page) 1517 { 1518 loff_t range_start = page_offset(page); 1519 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1520 struct writeback_control wbc = { 1521 .sync_mode = WB_SYNC_ALL, 1522 .nr_to_write = 0, 1523 .range_start = range_start, 1524 .range_end = range_end, 1525 }; 1526 int ret; 1527 1528 for (;;) { 1529 wait_on_page_writeback(page); 1530 if (clear_page_dirty_for_io(page)) { 1531 ret = nfs_writepage_locked(page, &wbc); 1532 if (ret < 0) 1533 goto out_error; 1534 continue; 1535 } 1536 if (!PagePrivate(page)) 1537 break; 1538 ret = nfs_commit_inode(inode, FLUSH_SYNC); 1539 if (ret < 0) 1540 goto out_error; 1541 } 1542 return 0; 1543 out_error: 1544 return ret; 1545 } 1546 1547 #ifdef CONFIG_MIGRATION 1548 int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1549 struct page *page) 1550 { 1551 struct nfs_page *req; 1552 int ret; 1553 1554 nfs_fscache_release_page(page, GFP_KERNEL); 1555 1556 req = nfs_find_and_lock_request(page, false); 1557 ret = PTR_ERR(req); 1558 if (IS_ERR(req)) 1559 goto out; 1560 1561 ret = migrate_page(mapping, newpage, page); 1562 if (!req) 1563 goto out; 1564 if (ret) 1565 goto out_unlock; 1566 page_cache_get(newpage); 1567 spin_lock(&mapping->host->i_lock); 1568 req->wb_page = newpage; 1569 SetPagePrivate(newpage); 1570 set_page_private(newpage, (unsigned long)req); 1571 ClearPagePrivate(page); 1572 set_page_private(page, 0); 1573 spin_unlock(&mapping->host->i_lock); 1574 page_cache_release(page); 1575 out_unlock: 1576 nfs_clear_page_tag_locked(req); 1577 out: 1578 return ret; 1579 } 1580 #endif 1581 1582 int __init nfs_init_writepagecache(void) 1583 { 1584 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1585 sizeof(struct nfs_write_data), 1586 0, SLAB_HWCACHE_ALIGN, 1587 NULL); 1588 if (nfs_wdata_cachep == NULL) 1589 return -ENOMEM; 1590 1591 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1592 nfs_wdata_cachep); 1593 if (nfs_wdata_mempool == NULL) 1594 return -ENOMEM; 1595 1596 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1597 nfs_wdata_cachep); 1598 if (nfs_commit_mempool == NULL) 1599 return -ENOMEM; 1600 1601 /* 1602 * NFS congestion size, scale with available memory. 1603 * 1604 * 64MB: 8192k 1605 * 128MB: 11585k 1606 * 256MB: 16384k 1607 * 512MB: 23170k 1608 * 1GB: 32768k 1609 * 2GB: 46340k 1610 * 4GB: 65536k 1611 * 8GB: 92681k 1612 * 16GB: 131072k 1613 * 1614 * This allows larger machines to have larger/more transfers. 1615 * Limit the default to 256M 1616 */ 1617 nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); 1618 if (nfs_congestion_kb > 256*1024) 1619 nfs_congestion_kb = 256*1024; 1620 1621 return 0; 1622 } 1623 1624 void nfs_destroy_writepagecache(void) 1625 { 1626 mempool_destroy(nfs_commit_mempool); 1627 mempool_destroy(nfs_wdata_mempool); 1628 kmem_cache_destroy(nfs_wdata_cachep); 1629 } 1630 1631