1 /* 2 * linux/fs/nfs/write.c 3 * 4 * Write file data over NFS. 5 * 6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/file.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 #include <linux/migrate.h> 17 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_mount.h> 21 #include <linux/nfs_page.h> 22 #include <linux/backing-dev.h> 23 24 #include <asm/uaccess.h> 25 26 #include "delegation.h" 27 #include "internal.h" 28 #include "iostat.h" 29 #include "nfs4_fs.h" 30 #include "fscache.h" 31 32 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 33 34 #define MIN_POOL_WRITE (32) 35 #define MIN_POOL_COMMIT (4) 36 37 /* 38 * Local function declarations 39 */ 40 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, 41 struct inode *inode, int ioflags); 42 static void nfs_redirty_request(struct nfs_page *req); 43 static const struct rpc_call_ops nfs_write_partial_ops; 44 static const struct rpc_call_ops nfs_write_full_ops; 45 static const struct rpc_call_ops nfs_commit_ops; 46 47 static struct kmem_cache *nfs_wdata_cachep; 48 static mempool_t *nfs_wdata_mempool; 49 static mempool_t *nfs_commit_mempool; 50 51 struct nfs_write_data *nfs_commitdata_alloc(void) 52 { 53 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 54 55 if (p) { 56 memset(p, 0, sizeof(*p)); 57 INIT_LIST_HEAD(&p->pages); 58 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; 59 } 60 return p; 61 } 62 63 void nfs_commit_free(struct nfs_write_data *p) 64 { 65 if (p && (p->pagevec != &p->page_array[0])) 66 kfree(p->pagevec); 67 mempool_free(p, nfs_commit_mempool); 68 } 69 70 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 71 { 72 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 73 74 if (p) { 75 memset(p, 0, sizeof(*p)); 76 INIT_LIST_HEAD(&p->pages); 77 p->npages = pagecount; 78 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; 79 if (pagecount <= ARRAY_SIZE(p->page_array)) 80 p->pagevec = p->page_array; 81 else { 82 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 83 if (!p->pagevec) { 84 mempool_free(p, nfs_wdata_mempool); 85 p = NULL; 86 } 87 } 88 } 89 return p; 90 } 91 92 void nfs_writedata_free(struct nfs_write_data *p) 93 { 94 if (p && (p->pagevec != &p->page_array[0])) 95 kfree(p->pagevec); 96 mempool_free(p, nfs_wdata_mempool); 97 } 98 99 static void nfs_writedata_release(struct nfs_write_data *wdata) 100 { 101 put_nfs_open_context(wdata->args.context); 102 nfs_writedata_free(wdata); 103 } 104 105 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 106 { 107 ctx->error = error; 108 smp_wmb(); 109 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 110 } 111 112 static struct nfs_page *nfs_page_find_request_locked(struct page *page) 113 { 114 struct nfs_page *req = NULL; 115 116 if (PagePrivate(page)) { 117 req = (struct nfs_page *)page_private(page); 118 if (req != NULL) 119 kref_get(&req->wb_kref); 120 } 121 return req; 122 } 123 124 static struct nfs_page *nfs_page_find_request(struct page *page) 125 { 126 struct inode *inode = page->mapping->host; 127 struct nfs_page *req = NULL; 128 129 spin_lock(&inode->i_lock); 130 req = nfs_page_find_request_locked(page); 131 spin_unlock(&inode->i_lock); 132 return req; 133 } 134 135 /* Adjust the file length if we're writing beyond the end */ 136 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) 137 { 138 struct inode *inode = page->mapping->host; 139 loff_t end, i_size; 140 pgoff_t end_index; 141 142 spin_lock(&inode->i_lock); 143 i_size = i_size_read(inode); 144 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 145 if (i_size > 0 && page->index < end_index) 146 goto out; 147 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); 148 if (i_size >= end) 149 goto out; 150 i_size_write(inode, end); 151 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); 152 out: 153 spin_unlock(&inode->i_lock); 154 } 155 156 /* A writeback failed: mark the page as bad, and invalidate the page cache */ 157 static void nfs_set_pageerror(struct page *page) 158 { 159 SetPageError(page); 160 nfs_zap_mapping(page->mapping->host, page->mapping); 161 } 162 163 /* We can set the PG_uptodate flag if we see that a write request 164 * covers the full page. 165 */ 166 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 167 { 168 if (PageUptodate(page)) 169 return; 170 if (base != 0) 171 return; 172 if (count != nfs_page_length(page)) 173 return; 174 SetPageUptodate(page); 175 } 176 177 static int wb_priority(struct writeback_control *wbc) 178 { 179 if (wbc->for_reclaim) 180 return FLUSH_HIGHPRI | FLUSH_STABLE; 181 if (wbc->for_kupdate) 182 return FLUSH_LOWPRI; 183 return 0; 184 } 185 186 /* 187 * NFS congestion control 188 */ 189 190 int nfs_congestion_kb; 191 192 #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) 193 #define NFS_CONGESTION_OFF_THRESH \ 194 (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) 195 196 static int nfs_set_page_writeback(struct page *page) 197 { 198 int ret = test_set_page_writeback(page); 199 200 if (!ret) { 201 struct inode *inode = page->mapping->host; 202 struct nfs_server *nfss = NFS_SERVER(inode); 203 204 if (atomic_long_inc_return(&nfss->writeback) > 205 NFS_CONGESTION_ON_THRESH) { 206 set_bdi_congested(&nfss->backing_dev_info, 207 BLK_RW_ASYNC); 208 } 209 } 210 return ret; 211 } 212 213 static void nfs_end_page_writeback(struct page *page) 214 { 215 struct inode *inode = page->mapping->host; 216 struct nfs_server *nfss = NFS_SERVER(inode); 217 218 end_page_writeback(page); 219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 221 } 222 223 static struct nfs_page *nfs_find_and_lock_request(struct page *page) 224 { 225 struct inode *inode = page->mapping->host; 226 struct nfs_page *req; 227 int ret; 228 229 spin_lock(&inode->i_lock); 230 for (;;) { 231 req = nfs_page_find_request_locked(page); 232 if (req == NULL) 233 break; 234 if (nfs_set_page_tag_locked(req)) 235 break; 236 /* Note: If we hold the page lock, as is the case in nfs_writepage, 237 * then the call to nfs_set_page_tag_locked() will always 238 * succeed provided that someone hasn't already marked the 239 * request as dirty (in which case we don't care). 240 */ 241 spin_unlock(&inode->i_lock); 242 ret = nfs_wait_on_request(req); 243 nfs_release_request(req); 244 if (ret != 0) 245 return ERR_PTR(ret); 246 spin_lock(&inode->i_lock); 247 } 248 spin_unlock(&inode->i_lock); 249 return req; 250 } 251 252 /* 253 * Find an associated nfs write request, and prepare to flush it out 254 * May return an error if the user signalled nfs_wait_on_request(). 255 */ 256 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 257 struct page *page) 258 { 259 struct nfs_page *req; 260 int ret = 0; 261 262 req = nfs_find_and_lock_request(page); 263 if (!req) 264 goto out; 265 ret = PTR_ERR(req); 266 if (IS_ERR(req)) 267 goto out; 268 269 ret = nfs_set_page_writeback(page); 270 BUG_ON(ret != 0); 271 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); 272 273 if (!nfs_pageio_add_request(pgio, req)) { 274 nfs_redirty_request(req); 275 ret = pgio->pg_error; 276 } 277 out: 278 return ret; 279 } 280 281 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 282 { 283 struct inode *inode = page->mapping->host; 284 285 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 286 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 287 288 nfs_pageio_cond_complete(pgio, page->index); 289 return nfs_page_async_flush(pgio, page); 290 } 291 292 /* 293 * Write an mmapped page to the server. 294 */ 295 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 296 { 297 struct nfs_pageio_descriptor pgio; 298 int err; 299 300 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 301 err = nfs_do_writepage(page, wbc, &pgio); 302 nfs_pageio_complete(&pgio); 303 if (err < 0) 304 return err; 305 if (pgio.pg_error < 0) 306 return pgio.pg_error; 307 return 0; 308 } 309 310 int nfs_writepage(struct page *page, struct writeback_control *wbc) 311 { 312 int ret; 313 314 ret = nfs_writepage_locked(page, wbc); 315 unlock_page(page); 316 return ret; 317 } 318 319 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) 320 { 321 int ret; 322 323 ret = nfs_do_writepage(page, wbc, data); 324 unlock_page(page); 325 return ret; 326 } 327 328 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 329 { 330 struct inode *inode = mapping->host; 331 unsigned long *bitlock = &NFS_I(inode)->flags; 332 struct nfs_pageio_descriptor pgio; 333 int err; 334 335 /* Stop dirtying of new pages while we sync */ 336 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 337 nfs_wait_bit_killable, TASK_KILLABLE); 338 if (err) 339 goto out_err; 340 341 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 342 343 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 344 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 345 nfs_pageio_complete(&pgio); 346 347 clear_bit_unlock(NFS_INO_FLUSHING, bitlock); 348 smp_mb__after_clear_bit(); 349 wake_up_bit(bitlock, NFS_INO_FLUSHING); 350 351 if (err < 0) 352 goto out_err; 353 err = pgio.pg_error; 354 if (err < 0) 355 goto out_err; 356 return 0; 357 out_err: 358 return err; 359 } 360 361 /* 362 * Insert a write request into an inode 363 */ 364 static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 365 { 366 struct nfs_inode *nfsi = NFS_I(inode); 367 int error; 368 369 error = radix_tree_preload(GFP_NOFS); 370 if (error != 0) 371 goto out; 372 373 /* Lock the request! */ 374 nfs_lock_request_dontget(req); 375 376 spin_lock(&inode->i_lock); 377 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 378 BUG_ON(error); 379 if (!nfsi->npages) { 380 igrab(inode); 381 if (nfs_have_delegation(inode, FMODE_WRITE)) 382 nfsi->change_attr++; 383 } 384 SetPagePrivate(req->wb_page); 385 set_page_private(req->wb_page, (unsigned long)req); 386 nfsi->npages++; 387 kref_get(&req->wb_kref); 388 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 389 NFS_PAGE_TAG_LOCKED); 390 spin_unlock(&inode->i_lock); 391 radix_tree_preload_end(); 392 out: 393 return error; 394 } 395 396 /* 397 * Remove a write request from an inode 398 */ 399 static void nfs_inode_remove_request(struct nfs_page *req) 400 { 401 struct inode *inode = req->wb_context->path.dentry->d_inode; 402 struct nfs_inode *nfsi = NFS_I(inode); 403 404 BUG_ON (!NFS_WBACK_BUSY(req)); 405 406 spin_lock(&inode->i_lock); 407 set_page_private(req->wb_page, 0); 408 ClearPagePrivate(req->wb_page); 409 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 410 nfsi->npages--; 411 if (!nfsi->npages) { 412 spin_unlock(&inode->i_lock); 413 iput(inode); 414 } else 415 spin_unlock(&inode->i_lock); 416 nfs_clear_request(req); 417 nfs_release_request(req); 418 } 419 420 static void 421 nfs_mark_request_dirty(struct nfs_page *req) 422 { 423 __set_page_dirty_nobuffers(req->wb_page); 424 } 425 426 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 427 /* 428 * Add a request to the inode's commit list. 429 */ 430 static void 431 nfs_mark_request_commit(struct nfs_page *req) 432 { 433 struct inode *inode = req->wb_context->path.dentry->d_inode; 434 struct nfs_inode *nfsi = NFS_I(inode); 435 436 spin_lock(&inode->i_lock); 437 set_bit(PG_CLEAN, &(req)->wb_flags); 438 radix_tree_tag_set(&nfsi->nfs_page_tree, 439 req->wb_index, 440 NFS_PAGE_TAG_COMMIT); 441 spin_unlock(&inode->i_lock); 442 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 443 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 444 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 445 } 446 447 static int 448 nfs_clear_request_commit(struct nfs_page *req) 449 { 450 struct page *page = req->wb_page; 451 452 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { 453 dec_zone_page_state(page, NR_UNSTABLE_NFS); 454 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 455 return 1; 456 } 457 return 0; 458 } 459 460 static inline 461 int nfs_write_need_commit(struct nfs_write_data *data) 462 { 463 return data->verf.committed != NFS_FILE_SYNC; 464 } 465 466 static inline 467 int nfs_reschedule_unstable_write(struct nfs_page *req) 468 { 469 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 470 nfs_mark_request_commit(req); 471 return 1; 472 } 473 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 474 nfs_mark_request_dirty(req); 475 return 1; 476 } 477 return 0; 478 } 479 #else 480 static inline void 481 nfs_mark_request_commit(struct nfs_page *req) 482 { 483 } 484 485 static inline int 486 nfs_clear_request_commit(struct nfs_page *req) 487 { 488 return 0; 489 } 490 491 static inline 492 int nfs_write_need_commit(struct nfs_write_data *data) 493 { 494 return 0; 495 } 496 497 static inline 498 int nfs_reschedule_unstable_write(struct nfs_page *req) 499 { 500 return 0; 501 } 502 #endif 503 504 /* 505 * Wait for a request to complete. 506 * 507 * Interruptible by fatal signals only. 508 */ 509 static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) 510 { 511 struct nfs_inode *nfsi = NFS_I(inode); 512 struct nfs_page *req; 513 pgoff_t idx_end, next; 514 unsigned int res = 0; 515 int error; 516 517 if (npages == 0) 518 idx_end = ~0; 519 else 520 idx_end = idx_start + npages - 1; 521 522 next = idx_start; 523 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { 524 if (req->wb_index > idx_end) 525 break; 526 527 next = req->wb_index + 1; 528 BUG_ON(!NFS_WBACK_BUSY(req)); 529 530 kref_get(&req->wb_kref); 531 spin_unlock(&inode->i_lock); 532 error = nfs_wait_on_request(req); 533 nfs_release_request(req); 534 spin_lock(&inode->i_lock); 535 if (error < 0) 536 return error; 537 res++; 538 } 539 return res; 540 } 541 542 static void nfs_cancel_commit_list(struct list_head *head) 543 { 544 struct nfs_page *req; 545 546 while(!list_empty(head)) { 547 req = nfs_list_entry(head->next); 548 nfs_list_remove_request(req); 549 nfs_clear_request_commit(req); 550 nfs_inode_remove_request(req); 551 nfs_unlock_request(req); 552 } 553 } 554 555 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 556 static int 557 nfs_need_commit(struct nfs_inode *nfsi) 558 { 559 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); 560 } 561 562 /* 563 * nfs_scan_commit - Scan an inode for commit requests 564 * @inode: NFS inode to scan 565 * @dst: destination list 566 * @idx_start: lower bound of page->index to scan. 567 * @npages: idx_start + npages sets the upper bound to scan. 568 * 569 * Moves requests from the inode's 'commit' request list. 570 * The requests are *not* checked to ensure that they form a contiguous set. 571 */ 572 static int 573 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 574 { 575 struct nfs_inode *nfsi = NFS_I(inode); 576 577 if (!nfs_need_commit(nfsi)) 578 return 0; 579 580 return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 581 } 582 #else 583 static inline int nfs_need_commit(struct nfs_inode *nfsi) 584 { 585 return 0; 586 } 587 588 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 589 { 590 return 0; 591 } 592 #endif 593 594 /* 595 * Search for an existing write request, and attempt to update 596 * it to reflect a new dirty region on a given page. 597 * 598 * If the attempt fails, then the existing request is flushed out 599 * to disk. 600 */ 601 static struct nfs_page *nfs_try_to_update_request(struct inode *inode, 602 struct page *page, 603 unsigned int offset, 604 unsigned int bytes) 605 { 606 struct nfs_page *req; 607 unsigned int rqend; 608 unsigned int end; 609 int error; 610 611 if (!PagePrivate(page)) 612 return NULL; 613 614 end = offset + bytes; 615 spin_lock(&inode->i_lock); 616 617 for (;;) { 618 req = nfs_page_find_request_locked(page); 619 if (req == NULL) 620 goto out_unlock; 621 622 rqend = req->wb_offset + req->wb_bytes; 623 /* 624 * Tell the caller to flush out the request if 625 * the offsets are non-contiguous. 626 * Note: nfs_flush_incompatible() will already 627 * have flushed out requests having wrong owners. 628 */ 629 if (offset > rqend 630 || end < req->wb_offset) 631 goto out_flushme; 632 633 if (nfs_set_page_tag_locked(req)) 634 break; 635 636 /* The request is locked, so wait and then retry */ 637 spin_unlock(&inode->i_lock); 638 error = nfs_wait_on_request(req); 639 nfs_release_request(req); 640 if (error != 0) 641 goto out_err; 642 spin_lock(&inode->i_lock); 643 } 644 645 if (nfs_clear_request_commit(req)) 646 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 647 req->wb_index, NFS_PAGE_TAG_COMMIT); 648 649 /* Okay, the request matches. Update the region */ 650 if (offset < req->wb_offset) { 651 req->wb_offset = offset; 652 req->wb_pgbase = offset; 653 } 654 if (end > rqend) 655 req->wb_bytes = end - req->wb_offset; 656 else 657 req->wb_bytes = rqend - req->wb_offset; 658 out_unlock: 659 spin_unlock(&inode->i_lock); 660 return req; 661 out_flushme: 662 spin_unlock(&inode->i_lock); 663 nfs_release_request(req); 664 error = nfs_wb_page(inode, page); 665 out_err: 666 return ERR_PTR(error); 667 } 668 669 /* 670 * Try to update an existing write request, or create one if there is none. 671 * 672 * Note: Should always be called with the Page Lock held to prevent races 673 * if we have to add a new request. Also assumes that the caller has 674 * already called nfs_flush_incompatible() if necessary. 675 */ 676 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, 677 struct page *page, unsigned int offset, unsigned int bytes) 678 { 679 struct inode *inode = page->mapping->host; 680 struct nfs_page *req; 681 int error; 682 683 req = nfs_try_to_update_request(inode, page, offset, bytes); 684 if (req != NULL) 685 goto out; 686 req = nfs_create_request(ctx, inode, page, offset, bytes); 687 if (IS_ERR(req)) 688 goto out; 689 error = nfs_inode_add_request(inode, req); 690 if (error != 0) { 691 nfs_release_request(req); 692 req = ERR_PTR(error); 693 } 694 out: 695 return req; 696 } 697 698 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, 699 unsigned int offset, unsigned int count) 700 { 701 struct nfs_page *req; 702 703 req = nfs_setup_write_request(ctx, page, offset, count); 704 if (IS_ERR(req)) 705 return PTR_ERR(req); 706 /* Update file length */ 707 nfs_grow_file(page, offset, count); 708 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 709 nfs_clear_page_tag_locked(req); 710 return 0; 711 } 712 713 int nfs_flush_incompatible(struct file *file, struct page *page) 714 { 715 struct nfs_open_context *ctx = nfs_file_open_context(file); 716 struct nfs_page *req; 717 int do_flush, status; 718 /* 719 * Look for a request corresponding to this page. If there 720 * is one, and it belongs to another file, we flush it out 721 * before we try to copy anything into the page. Do this 722 * due to the lack of an ACCESS-type call in NFSv2. 723 * Also do the same if we find a request from an existing 724 * dropped page. 725 */ 726 do { 727 req = nfs_page_find_request(page); 728 if (req == NULL) 729 return 0; 730 do_flush = req->wb_page != page || req->wb_context != ctx; 731 nfs_release_request(req); 732 if (!do_flush) 733 return 0; 734 status = nfs_wb_page(page->mapping->host, page); 735 } while (status == 0); 736 return status; 737 } 738 739 /* 740 * If the page cache is marked as unsafe or invalid, then we can't rely on 741 * the PageUptodate() flag. In this case, we will need to turn off 742 * write optimisations that depend on the page contents being correct. 743 */ 744 static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 745 { 746 return PageUptodate(page) && 747 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 748 } 749 750 /* 751 * Update and possibly write a cached page of an NFS file. 752 * 753 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad 754 * things with a page scheduled for an RPC call (e.g. invalidate it). 755 */ 756 int nfs_updatepage(struct file *file, struct page *page, 757 unsigned int offset, unsigned int count) 758 { 759 struct nfs_open_context *ctx = nfs_file_open_context(file); 760 struct inode *inode = page->mapping->host; 761 int status = 0; 762 763 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 764 765 dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", 766 file->f_path.dentry->d_parent->d_name.name, 767 file->f_path.dentry->d_name.name, count, 768 (long long)(page_offset(page) + offset)); 769 770 /* If we're not using byte range locks, and we know the page 771 * is up to date, it may be more efficient to extend the write 772 * to cover the entire page in order to avoid fragmentation 773 * inefficiencies. 774 */ 775 if (nfs_write_pageuptodate(page, inode) && 776 inode->i_flock == NULL && 777 !(file->f_flags & O_SYNC)) { 778 count = max(count + offset, nfs_page_length(page)); 779 offset = 0; 780 } 781 782 status = nfs_writepage_setup(ctx, page, offset, count); 783 if (status < 0) 784 nfs_set_pageerror(page); 785 else 786 __set_page_dirty_nobuffers(page); 787 788 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 789 status, (long long)i_size_read(inode)); 790 return status; 791 } 792 793 static void nfs_writepage_release(struct nfs_page *req) 794 { 795 796 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 797 nfs_end_page_writeback(req->wb_page); 798 nfs_inode_remove_request(req); 799 } else 800 nfs_end_page_writeback(req->wb_page); 801 nfs_clear_page_tag_locked(req); 802 } 803 804 static int flush_task_priority(int how) 805 { 806 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 807 case FLUSH_HIGHPRI: 808 return RPC_PRIORITY_HIGH; 809 case FLUSH_LOWPRI: 810 return RPC_PRIORITY_LOW; 811 } 812 return RPC_PRIORITY_NORMAL; 813 } 814 815 /* 816 * Set up the argument/result storage required for the RPC call. 817 */ 818 static int nfs_write_rpcsetup(struct nfs_page *req, 819 struct nfs_write_data *data, 820 const struct rpc_call_ops *call_ops, 821 unsigned int count, unsigned int offset, 822 int how) 823 { 824 struct inode *inode = req->wb_context->path.dentry->d_inode; 825 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; 826 int priority = flush_task_priority(how); 827 struct rpc_task *task; 828 struct rpc_message msg = { 829 .rpc_argp = &data->args, 830 .rpc_resp = &data->res, 831 .rpc_cred = req->wb_context->cred, 832 }; 833 struct rpc_task_setup task_setup_data = { 834 .rpc_client = NFS_CLIENT(inode), 835 .task = &data->task, 836 .rpc_message = &msg, 837 .callback_ops = call_ops, 838 .callback_data = data, 839 .workqueue = nfsiod_workqueue, 840 .flags = flags, 841 .priority = priority, 842 }; 843 844 /* Set up the RPC argument and reply structs 845 * NB: take care not to mess about with data->commit et al. */ 846 847 data->req = req; 848 data->inode = inode = req->wb_context->path.dentry->d_inode; 849 data->cred = msg.rpc_cred; 850 851 data->args.fh = NFS_FH(inode); 852 data->args.offset = req_offset(req) + offset; 853 data->args.pgbase = req->wb_pgbase + offset; 854 data->args.pages = data->pagevec; 855 data->args.count = count; 856 data->args.context = get_nfs_open_context(req->wb_context); 857 data->args.stable = NFS_UNSTABLE; 858 if (how & FLUSH_STABLE) { 859 data->args.stable = NFS_DATA_SYNC; 860 if (!nfs_need_commit(NFS_I(inode))) 861 data->args.stable = NFS_FILE_SYNC; 862 } 863 864 data->res.fattr = &data->fattr; 865 data->res.count = count; 866 data->res.verf = &data->verf; 867 nfs_fattr_init(&data->fattr); 868 869 /* Set up the initial task struct. */ 870 NFS_PROTO(inode)->write_setup(data, &msg); 871 872 dprintk("NFS: %5u initiated write call " 873 "(req %s/%lld, %u bytes @ offset %llu)\n", 874 data->task.tk_pid, 875 inode->i_sb->s_id, 876 (long long)NFS_FILEID(inode), 877 count, 878 (unsigned long long)data->args.offset); 879 880 task = rpc_run_task(&task_setup_data); 881 if (IS_ERR(task)) 882 return PTR_ERR(task); 883 rpc_put_task(task); 884 return 0; 885 } 886 887 /* If a nfs_flush_* function fails, it should remove reqs from @head and 888 * call this on each, which will prepare them to be retried on next 889 * writeback using standard nfs. 890 */ 891 static void nfs_redirty_request(struct nfs_page *req) 892 { 893 nfs_mark_request_dirty(req); 894 nfs_end_page_writeback(req->wb_page); 895 nfs_clear_page_tag_locked(req); 896 } 897 898 /* 899 * Generate multiple small requests to write out a single 900 * contiguous dirty area on one page. 901 */ 902 static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 903 { 904 struct nfs_page *req = nfs_list_entry(head->next); 905 struct page *page = req->wb_page; 906 struct nfs_write_data *data; 907 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 908 unsigned int offset; 909 int requests = 0; 910 int ret = 0; 911 LIST_HEAD(list); 912 913 nfs_list_remove_request(req); 914 915 nbytes = count; 916 do { 917 size_t len = min(nbytes, wsize); 918 919 data = nfs_writedata_alloc(1); 920 if (!data) 921 goto out_bad; 922 list_add(&data->pages, &list); 923 requests++; 924 nbytes -= len; 925 } while (nbytes != 0); 926 atomic_set(&req->wb_complete, requests); 927 928 ClearPageError(page); 929 offset = 0; 930 nbytes = count; 931 do { 932 int ret2; 933 934 data = list_entry(list.next, struct nfs_write_data, pages); 935 list_del_init(&data->pages); 936 937 data->pagevec[0] = page; 938 939 if (nbytes < wsize) 940 wsize = nbytes; 941 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 942 wsize, offset, how); 943 if (ret == 0) 944 ret = ret2; 945 offset += wsize; 946 nbytes -= wsize; 947 } while (nbytes != 0); 948 949 return ret; 950 951 out_bad: 952 while (!list_empty(&list)) { 953 data = list_entry(list.next, struct nfs_write_data, pages); 954 list_del(&data->pages); 955 nfs_writedata_release(data); 956 } 957 nfs_redirty_request(req); 958 return -ENOMEM; 959 } 960 961 /* 962 * Create an RPC task for the given write request and kick it. 963 * The page must have been locked by the caller. 964 * 965 * It may happen that the page we're passed is not marked dirty. 966 * This is the case if nfs_updatepage detects a conflicting request 967 * that has been written but not committed. 968 */ 969 static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 970 { 971 struct nfs_page *req; 972 struct page **pages; 973 struct nfs_write_data *data; 974 975 data = nfs_writedata_alloc(npages); 976 if (!data) 977 goto out_bad; 978 979 pages = data->pagevec; 980 while (!list_empty(head)) { 981 req = nfs_list_entry(head->next); 982 nfs_list_remove_request(req); 983 nfs_list_add_request(req, &data->pages); 984 ClearPageError(req->wb_page); 985 *pages++ = req->wb_page; 986 } 987 req = nfs_list_entry(data->pages.next); 988 989 /* Set up the argument struct */ 990 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 991 out_bad: 992 while (!list_empty(head)) { 993 req = nfs_list_entry(head->next); 994 nfs_list_remove_request(req); 995 nfs_redirty_request(req); 996 } 997 return -ENOMEM; 998 } 999 1000 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1001 struct inode *inode, int ioflags) 1002 { 1003 size_t wsize = NFS_SERVER(inode)->wsize; 1004 1005 if (wsize < PAGE_CACHE_SIZE) 1006 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1007 else 1008 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); 1009 } 1010 1011 /* 1012 * Handle a write reply that flushed part of a page. 1013 */ 1014 static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) 1015 { 1016 struct nfs_write_data *data = calldata; 1017 1018 dprintk("NFS: %5u write(%s/%lld %d@%lld)", 1019 task->tk_pid, 1020 data->req->wb_context->path.dentry->d_inode->i_sb->s_id, 1021 (long long) 1022 NFS_FILEID(data->req->wb_context->path.dentry->d_inode), 1023 data->req->wb_bytes, (long long)req_offset(data->req)); 1024 1025 nfs_writeback_done(task, data); 1026 } 1027 1028 static void nfs_writeback_release_partial(void *calldata) 1029 { 1030 struct nfs_write_data *data = calldata; 1031 struct nfs_page *req = data->req; 1032 struct page *page = req->wb_page; 1033 int status = data->task.tk_status; 1034 1035 if (status < 0) { 1036 nfs_set_pageerror(page); 1037 nfs_context_set_write_error(req->wb_context, status); 1038 dprintk(", error = %d\n", status); 1039 goto out; 1040 } 1041 1042 if (nfs_write_need_commit(data)) { 1043 struct inode *inode = page->mapping->host; 1044 1045 spin_lock(&inode->i_lock); 1046 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 1047 /* Do nothing we need to resend the writes */ 1048 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 1049 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1050 dprintk(" defer commit\n"); 1051 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { 1052 set_bit(PG_NEED_RESCHED, &req->wb_flags); 1053 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 1054 dprintk(" server reboot detected\n"); 1055 } 1056 spin_unlock(&inode->i_lock); 1057 } else 1058 dprintk(" OK\n"); 1059 1060 out: 1061 if (atomic_dec_and_test(&req->wb_complete)) 1062 nfs_writepage_release(req); 1063 nfs_writedata_release(calldata); 1064 } 1065 1066 #if defined(CONFIG_NFS_V4_1) 1067 void nfs_write_prepare(struct rpc_task *task, void *calldata) 1068 { 1069 struct nfs_write_data *data = calldata; 1070 struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client; 1071 1072 if (nfs4_setup_sequence(clp, &data->args.seq_args, 1073 &data->res.seq_res, 1, task)) 1074 return; 1075 rpc_call_start(task); 1076 } 1077 #endif /* CONFIG_NFS_V4_1 */ 1078 1079 static const struct rpc_call_ops nfs_write_partial_ops = { 1080 #if defined(CONFIG_NFS_V4_1) 1081 .rpc_call_prepare = nfs_write_prepare, 1082 #endif /* CONFIG_NFS_V4_1 */ 1083 .rpc_call_done = nfs_writeback_done_partial, 1084 .rpc_release = nfs_writeback_release_partial, 1085 }; 1086 1087 /* 1088 * Handle a write reply that flushes a whole page. 1089 * 1090 * FIXME: There is an inherent race with invalidate_inode_pages and 1091 * writebacks since the page->count is kept > 1 for as long 1092 * as the page has a write request pending. 1093 */ 1094 static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1095 { 1096 struct nfs_write_data *data = calldata; 1097 1098 nfs_writeback_done(task, data); 1099 } 1100 1101 static void nfs_writeback_release_full(void *calldata) 1102 { 1103 struct nfs_write_data *data = calldata; 1104 int status = data->task.tk_status; 1105 1106 /* Update attributes as result of writeback. */ 1107 while (!list_empty(&data->pages)) { 1108 struct nfs_page *req = nfs_list_entry(data->pages.next); 1109 struct page *page = req->wb_page; 1110 1111 nfs_list_remove_request(req); 1112 1113 dprintk("NFS: %5u write (%s/%lld %d@%lld)", 1114 data->task.tk_pid, 1115 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1116 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1117 req->wb_bytes, 1118 (long long)req_offset(req)); 1119 1120 if (status < 0) { 1121 nfs_set_pageerror(page); 1122 nfs_context_set_write_error(req->wb_context, status); 1123 dprintk(", error = %d\n", status); 1124 goto remove_request; 1125 } 1126 1127 if (nfs_write_need_commit(data)) { 1128 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1129 nfs_mark_request_commit(req); 1130 nfs_end_page_writeback(page); 1131 dprintk(" marked for commit\n"); 1132 goto next; 1133 } 1134 dprintk(" OK\n"); 1135 remove_request: 1136 nfs_end_page_writeback(page); 1137 nfs_inode_remove_request(req); 1138 next: 1139 nfs_clear_page_tag_locked(req); 1140 } 1141 nfs_writedata_release(calldata); 1142 } 1143 1144 static const struct rpc_call_ops nfs_write_full_ops = { 1145 #if defined(CONFIG_NFS_V4_1) 1146 .rpc_call_prepare = nfs_write_prepare, 1147 #endif /* CONFIG_NFS_V4_1 */ 1148 .rpc_call_done = nfs_writeback_done_full, 1149 .rpc_release = nfs_writeback_release_full, 1150 }; 1151 1152 1153 /* 1154 * This function is called when the WRITE call is complete. 1155 */ 1156 int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1157 { 1158 struct nfs_writeargs *argp = &data->args; 1159 struct nfs_writeres *resp = &data->res; 1160 struct nfs_server *server = NFS_SERVER(data->inode); 1161 int status; 1162 1163 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1164 task->tk_pid, task->tk_status); 1165 1166 /* 1167 * ->write_done will attempt to use post-op attributes to detect 1168 * conflicting writes by other clients. A strict interpretation 1169 * of close-to-open would allow us to continue caching even if 1170 * another writer had changed the file, but some applications 1171 * depend on tighter cache coherency when writing. 1172 */ 1173 status = NFS_PROTO(data->inode)->write_done(task, data); 1174 if (status != 0) 1175 return status; 1176 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1177 1178 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1179 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1180 /* We tried a write call, but the server did not 1181 * commit data to stable storage even though we 1182 * requested it. 1183 * Note: There is a known bug in Tru64 < 5.0 in which 1184 * the server reports NFS_DATA_SYNC, but performs 1185 * NFS_FILE_SYNC. We therefore implement this checking 1186 * as a dprintk() in order to avoid filling syslog. 1187 */ 1188 static unsigned long complain; 1189 1190 if (time_before(complain, jiffies)) { 1191 dprintk("NFS: faulty NFS server %s:" 1192 " (committed = %d) != (stable = %d)\n", 1193 server->nfs_client->cl_hostname, 1194 resp->verf->committed, argp->stable); 1195 complain = jiffies + 300 * HZ; 1196 } 1197 } 1198 #endif 1199 /* Is this a short write? */ 1200 if (task->tk_status >= 0 && resp->count < argp->count) { 1201 static unsigned long complain; 1202 1203 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1204 1205 /* Has the server at least made some progress? */ 1206 if (resp->count != 0) { 1207 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1208 if (resp->verf->committed != NFS_UNSTABLE) { 1209 /* Resend from where the server left off */ 1210 argp->offset += resp->count; 1211 argp->pgbase += resp->count; 1212 argp->count -= resp->count; 1213 } else { 1214 /* Resend as a stable write in order to avoid 1215 * headaches in the case of a server crash. 1216 */ 1217 argp->stable = NFS_FILE_SYNC; 1218 } 1219 nfs4_restart_rpc(task, server->nfs_client); 1220 return -EAGAIN; 1221 } 1222 if (time_before(complain, jiffies)) { 1223 printk(KERN_WARNING 1224 "NFS: Server wrote zero bytes, expected %u.\n", 1225 argp->count); 1226 complain = jiffies + 300 * HZ; 1227 } 1228 /* Can't do anything about it except throw an error. */ 1229 task->tk_status = -EIO; 1230 } 1231 nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res); 1232 return 0; 1233 } 1234 1235 1236 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1237 void nfs_commitdata_release(void *data) 1238 { 1239 struct nfs_write_data *wdata = data; 1240 1241 put_nfs_open_context(wdata->args.context); 1242 nfs_commit_free(wdata); 1243 } 1244 1245 /* 1246 * Set up the argument/result storage required for the RPC call. 1247 */ 1248 static int nfs_commit_rpcsetup(struct list_head *head, 1249 struct nfs_write_data *data, 1250 int how) 1251 { 1252 struct nfs_page *first = nfs_list_entry(head->next); 1253 struct inode *inode = first->wb_context->path.dentry->d_inode; 1254 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; 1255 int priority = flush_task_priority(how); 1256 struct rpc_task *task; 1257 struct rpc_message msg = { 1258 .rpc_argp = &data->args, 1259 .rpc_resp = &data->res, 1260 .rpc_cred = first->wb_context->cred, 1261 }; 1262 struct rpc_task_setup task_setup_data = { 1263 .task = &data->task, 1264 .rpc_client = NFS_CLIENT(inode), 1265 .rpc_message = &msg, 1266 .callback_ops = &nfs_commit_ops, 1267 .callback_data = data, 1268 .workqueue = nfsiod_workqueue, 1269 .flags = flags, 1270 .priority = priority, 1271 }; 1272 1273 /* Set up the RPC argument and reply structs 1274 * NB: take care not to mess about with data->commit et al. */ 1275 1276 list_splice_init(head, &data->pages); 1277 1278 data->inode = inode; 1279 data->cred = msg.rpc_cred; 1280 1281 data->args.fh = NFS_FH(data->inode); 1282 /* Note: we always request a commit of the entire inode */ 1283 data->args.offset = 0; 1284 data->args.count = 0; 1285 data->args.context = get_nfs_open_context(first->wb_context); 1286 data->res.count = 0; 1287 data->res.fattr = &data->fattr; 1288 data->res.verf = &data->verf; 1289 nfs_fattr_init(&data->fattr); 1290 1291 /* Set up the initial task struct. */ 1292 NFS_PROTO(inode)->commit_setup(data, &msg); 1293 1294 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1295 1296 task = rpc_run_task(&task_setup_data); 1297 if (IS_ERR(task)) 1298 return PTR_ERR(task); 1299 rpc_put_task(task); 1300 return 0; 1301 } 1302 1303 /* 1304 * Commit dirty pages 1305 */ 1306 static int 1307 nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1308 { 1309 struct nfs_write_data *data; 1310 struct nfs_page *req; 1311 1312 data = nfs_commitdata_alloc(); 1313 1314 if (!data) 1315 goto out_bad; 1316 1317 /* Set up the argument struct */ 1318 return nfs_commit_rpcsetup(head, data, how); 1319 out_bad: 1320 while (!list_empty(head)) { 1321 req = nfs_list_entry(head->next); 1322 nfs_list_remove_request(req); 1323 nfs_mark_request_commit(req); 1324 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1325 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1326 BDI_RECLAIMABLE); 1327 nfs_clear_page_tag_locked(req); 1328 } 1329 return -ENOMEM; 1330 } 1331 1332 /* 1333 * COMMIT call returned 1334 */ 1335 static void nfs_commit_done(struct rpc_task *task, void *calldata) 1336 { 1337 struct nfs_write_data *data = calldata; 1338 1339 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1340 task->tk_pid, task->tk_status); 1341 1342 /* Call the NFS version-specific code */ 1343 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1344 return; 1345 } 1346 1347 static void nfs_commit_release(void *calldata) 1348 { 1349 struct nfs_write_data *data = calldata; 1350 struct nfs_page *req; 1351 int status = data->task.tk_status; 1352 1353 while (!list_empty(&data->pages)) { 1354 req = nfs_list_entry(data->pages.next); 1355 nfs_list_remove_request(req); 1356 nfs_clear_request_commit(req); 1357 1358 dprintk("NFS: commit (%s/%lld %d@%lld)", 1359 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1360 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1361 req->wb_bytes, 1362 (long long)req_offset(req)); 1363 if (status < 0) { 1364 nfs_context_set_write_error(req->wb_context, status); 1365 nfs_inode_remove_request(req); 1366 dprintk(", error = %d\n", status); 1367 goto next; 1368 } 1369 1370 /* Okay, COMMIT succeeded, apparently. Check the verifier 1371 * returned by the server against all stored verfs. */ 1372 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1373 /* We have a match */ 1374 nfs_inode_remove_request(req); 1375 dprintk(" OK\n"); 1376 goto next; 1377 } 1378 /* We have a mismatch. Write the page again */ 1379 dprintk(" mismatch\n"); 1380 nfs_mark_request_dirty(req); 1381 next: 1382 nfs_clear_page_tag_locked(req); 1383 } 1384 nfs_commitdata_release(calldata); 1385 } 1386 1387 static const struct rpc_call_ops nfs_commit_ops = { 1388 #if defined(CONFIG_NFS_V4_1) 1389 .rpc_call_prepare = nfs_write_prepare, 1390 #endif /* CONFIG_NFS_V4_1 */ 1391 .rpc_call_done = nfs_commit_done, 1392 .rpc_release = nfs_commit_release, 1393 }; 1394 1395 int nfs_commit_inode(struct inode *inode, int how) 1396 { 1397 LIST_HEAD(head); 1398 int res; 1399 1400 spin_lock(&inode->i_lock); 1401 res = nfs_scan_commit(inode, &head, 0, 0); 1402 spin_unlock(&inode->i_lock); 1403 if (res) { 1404 int error = nfs_commit_list(inode, &head, how); 1405 if (error < 0) 1406 return error; 1407 } 1408 return res; 1409 } 1410 #else 1411 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1412 { 1413 return 0; 1414 } 1415 #endif 1416 1417 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1418 { 1419 struct inode *inode = mapping->host; 1420 pgoff_t idx_start, idx_end; 1421 unsigned int npages = 0; 1422 LIST_HEAD(head); 1423 int nocommit = how & FLUSH_NOCOMMIT; 1424 long pages, ret; 1425 1426 /* FIXME */ 1427 if (wbc->range_cyclic) 1428 idx_start = 0; 1429 else { 1430 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 1431 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 1432 if (idx_end > idx_start) { 1433 pgoff_t l_npages = 1 + idx_end - idx_start; 1434 npages = l_npages; 1435 if (sizeof(npages) != sizeof(l_npages) && 1436 (pgoff_t)npages != l_npages) 1437 npages = 0; 1438 } 1439 } 1440 how &= ~FLUSH_NOCOMMIT; 1441 spin_lock(&inode->i_lock); 1442 do { 1443 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1444 if (ret != 0) 1445 continue; 1446 if (nocommit) 1447 break; 1448 pages = nfs_scan_commit(inode, &head, idx_start, npages); 1449 if (pages == 0) 1450 break; 1451 if (how & FLUSH_INVALIDATE) { 1452 spin_unlock(&inode->i_lock); 1453 nfs_cancel_commit_list(&head); 1454 ret = pages; 1455 spin_lock(&inode->i_lock); 1456 continue; 1457 } 1458 pages += nfs_scan_commit(inode, &head, 0, 0); 1459 spin_unlock(&inode->i_lock); 1460 ret = nfs_commit_list(inode, &head, how); 1461 spin_lock(&inode->i_lock); 1462 1463 } while (ret >= 0); 1464 spin_unlock(&inode->i_lock); 1465 return ret; 1466 } 1467 1468 static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) 1469 { 1470 int ret; 1471 1472 ret = nfs_writepages(mapping, wbc); 1473 if (ret < 0) 1474 goto out; 1475 ret = nfs_sync_mapping_wait(mapping, wbc, how); 1476 if (ret < 0) 1477 goto out; 1478 return 0; 1479 out: 1480 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 1481 return ret; 1482 } 1483 1484 /* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ 1485 static int nfs_write_mapping(struct address_space *mapping, int how) 1486 { 1487 struct writeback_control wbc = { 1488 .bdi = mapping->backing_dev_info, 1489 .sync_mode = WB_SYNC_ALL, 1490 .nr_to_write = LONG_MAX, 1491 .range_start = 0, 1492 .range_end = LLONG_MAX, 1493 }; 1494 1495 return __nfs_write_mapping(mapping, &wbc, how); 1496 } 1497 1498 /* 1499 * flush the inode to disk. 1500 */ 1501 int nfs_wb_all(struct inode *inode) 1502 { 1503 return nfs_write_mapping(inode->i_mapping, 0); 1504 } 1505 1506 int nfs_wb_nocommit(struct inode *inode) 1507 { 1508 return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); 1509 } 1510 1511 int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1512 { 1513 struct nfs_page *req; 1514 loff_t range_start = page_offset(page); 1515 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1516 struct writeback_control wbc = { 1517 .bdi = page->mapping->backing_dev_info, 1518 .sync_mode = WB_SYNC_ALL, 1519 .nr_to_write = LONG_MAX, 1520 .range_start = range_start, 1521 .range_end = range_end, 1522 }; 1523 int ret = 0; 1524 1525 BUG_ON(!PageLocked(page)); 1526 for (;;) { 1527 req = nfs_page_find_request(page); 1528 if (req == NULL) 1529 goto out; 1530 if (test_bit(PG_CLEAN, &req->wb_flags)) { 1531 nfs_release_request(req); 1532 break; 1533 } 1534 if (nfs_lock_request_dontget(req)) { 1535 nfs_inode_remove_request(req); 1536 /* 1537 * In case nfs_inode_remove_request has marked the 1538 * page as being dirty 1539 */ 1540 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1541 nfs_unlock_request(req); 1542 break; 1543 } 1544 ret = nfs_wait_on_request(req); 1545 if (ret < 0) 1546 goto out; 1547 } 1548 if (!PagePrivate(page)) 1549 return 0; 1550 ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); 1551 out: 1552 return ret; 1553 } 1554 1555 static int nfs_wb_page_priority(struct inode *inode, struct page *page, 1556 int how) 1557 { 1558 loff_t range_start = page_offset(page); 1559 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1560 struct writeback_control wbc = { 1561 .bdi = page->mapping->backing_dev_info, 1562 .sync_mode = WB_SYNC_ALL, 1563 .nr_to_write = LONG_MAX, 1564 .range_start = range_start, 1565 .range_end = range_end, 1566 }; 1567 int ret; 1568 1569 do { 1570 if (clear_page_dirty_for_io(page)) { 1571 ret = nfs_writepage_locked(page, &wbc); 1572 if (ret < 0) 1573 goto out_error; 1574 } else if (!PagePrivate(page)) 1575 break; 1576 ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); 1577 if (ret < 0) 1578 goto out_error; 1579 } while (PagePrivate(page)); 1580 return 0; 1581 out_error: 1582 __mark_inode_dirty(inode, I_DIRTY_PAGES); 1583 return ret; 1584 } 1585 1586 /* 1587 * Write back all requests on one page - we do this before reading it. 1588 */ 1589 int nfs_wb_page(struct inode *inode, struct page* page) 1590 { 1591 return nfs_wb_page_priority(inode, page, FLUSH_STABLE); 1592 } 1593 1594 #ifdef CONFIG_MIGRATION 1595 int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1596 struct page *page) 1597 { 1598 struct nfs_page *req; 1599 int ret; 1600 1601 if (PageFsCache(page)) 1602 nfs_fscache_release_page(page, GFP_KERNEL); 1603 1604 req = nfs_find_and_lock_request(page); 1605 ret = PTR_ERR(req); 1606 if (IS_ERR(req)) 1607 goto out; 1608 1609 ret = migrate_page(mapping, newpage, page); 1610 if (!req) 1611 goto out; 1612 if (ret) 1613 goto out_unlock; 1614 page_cache_get(newpage); 1615 req->wb_page = newpage; 1616 SetPagePrivate(newpage); 1617 set_page_private(newpage, page_private(page)); 1618 ClearPagePrivate(page); 1619 set_page_private(page, 0); 1620 page_cache_release(page); 1621 out_unlock: 1622 nfs_clear_page_tag_locked(req); 1623 nfs_release_request(req); 1624 out: 1625 return ret; 1626 } 1627 #endif 1628 1629 int __init nfs_init_writepagecache(void) 1630 { 1631 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1632 sizeof(struct nfs_write_data), 1633 0, SLAB_HWCACHE_ALIGN, 1634 NULL); 1635 if (nfs_wdata_cachep == NULL) 1636 return -ENOMEM; 1637 1638 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1639 nfs_wdata_cachep); 1640 if (nfs_wdata_mempool == NULL) 1641 return -ENOMEM; 1642 1643 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1644 nfs_wdata_cachep); 1645 if (nfs_commit_mempool == NULL) 1646 return -ENOMEM; 1647 1648 /* 1649 * NFS congestion size, scale with available memory. 1650 * 1651 * 64MB: 8192k 1652 * 128MB: 11585k 1653 * 256MB: 16384k 1654 * 512MB: 23170k 1655 * 1GB: 32768k 1656 * 2GB: 46340k 1657 * 4GB: 65536k 1658 * 8GB: 92681k 1659 * 16GB: 131072k 1660 * 1661 * This allows larger machines to have larger/more transfers. 1662 * Limit the default to 256M 1663 */ 1664 nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); 1665 if (nfs_congestion_kb > 256*1024) 1666 nfs_congestion_kb = 256*1024; 1667 1668 return 0; 1669 } 1670 1671 void nfs_destroy_writepagecache(void) 1672 { 1673 mempool_destroy(nfs_commit_mempool); 1674 mempool_destroy(nfs_wdata_mempool); 1675 kmem_cache_destroy(nfs_wdata_cachep); 1676 } 1677 1678