1 /* handling of writes to regular files and writing back to the server 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/backing-dev.h> 12 #include <linux/slab.h> 13 #include <linux/fs.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/pagevec.h> 17 #include "internal.h" 18 19 static int afs_write_back_from_locked_page(struct afs_writeback *wb, 20 struct page *page); 21 22 /* 23 * mark a page as having been made dirty and thus needing writeback 24 */ 25 int afs_set_page_dirty(struct page *page) 26 { 27 _enter(""); 28 return __set_page_dirty_nobuffers(page); 29 } 30 31 /* 32 * unlink a writeback record because its usage has reached zero 33 * - must be called with the wb->vnode->writeback_lock held 34 */ 35 static void afs_unlink_writeback(struct afs_writeback *wb) 36 { 37 struct afs_writeback *front; 38 struct afs_vnode *vnode = wb->vnode; 39 40 list_del_init(&wb->link); 41 if (!list_empty(&vnode->writebacks)) { 42 /* if an fsync rises to the front of the queue then wake it 43 * up */ 44 front = list_entry(vnode->writebacks.next, 45 struct afs_writeback, link); 46 if (front->state == AFS_WBACK_SYNCING) { 47 _debug("wake up sync"); 48 front->state = AFS_WBACK_COMPLETE; 49 wake_up(&front->waitq); 50 } 51 } 52 } 53 54 /* 55 * free a writeback record 56 */ 57 static void afs_free_writeback(struct afs_writeback *wb) 58 { 59 _enter(""); 60 key_put(wb->key); 61 kfree(wb); 62 } 63 64 /* 65 * dispose of a reference to a writeback record 66 */ 67 void afs_put_writeback(struct afs_writeback *wb) 68 { 69 struct afs_vnode *vnode = wb->vnode; 70 71 _enter("{%d}", wb->usage); 72 73 spin_lock(&vnode->writeback_lock); 74 if (--wb->usage == 0) 75 afs_unlink_writeback(wb); 76 else 77 wb = NULL; 78 spin_unlock(&vnode->writeback_lock); 79 if (wb) 80 afs_free_writeback(wb); 81 } 82 83 /* 84 * partly or wholly fill a page that's under preparation for writing 85 */ 86 static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 87 loff_t pos, struct page *page) 88 { 89 struct afs_read *req; 90 loff_t i_size; 91 int ret; 92 93 _enter(",,%llu", (unsigned long long)pos); 94 95 req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), 96 GFP_KERNEL); 97 if (!req) 98 return -ENOMEM; 99 100 atomic_set(&req->usage, 1); 101 req->pos = pos; 102 req->nr_pages = 1; 103 req->pages[0] = page; 104 105 i_size = i_size_read(&vnode->vfs_inode); 106 if (pos + PAGE_SIZE > i_size) 107 req->len = i_size - pos; 108 else 109 req->len = PAGE_SIZE; 110 111 ret = afs_vnode_fetch_data(vnode, key, req); 112 afs_put_read(req); 113 if (ret < 0) { 114 if (ret == -ENOENT) { 115 _debug("got NOENT from server" 116 " - marking file deleted and stale"); 117 set_bit(AFS_VNODE_DELETED, &vnode->flags); 118 ret = -ESTALE; 119 } 120 } 121 122 _leave(" = %d", ret); 123 return ret; 124 } 125 126 /* 127 * prepare to perform part of a write to a page 128 */ 129 int afs_write_begin(struct file *file, struct address_space *mapping, 130 loff_t pos, unsigned len, unsigned flags, 131 struct page **pagep, void **fsdata) 132 { 133 struct afs_writeback *candidate, *wb; 134 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 135 struct page *page; 136 struct key *key = file->private_data; 137 unsigned from = pos & (PAGE_SIZE - 1); 138 unsigned to = from + len; 139 pgoff_t index = pos >> PAGE_SHIFT; 140 int ret; 141 142 _enter("{%x:%u},{%lx},%u,%u", 143 vnode->fid.vid, vnode->fid.vnode, index, from, to); 144 145 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); 146 if (!candidate) 147 return -ENOMEM; 148 candidate->vnode = vnode; 149 candidate->first = candidate->last = index; 150 candidate->offset_first = from; 151 candidate->to_last = to; 152 INIT_LIST_HEAD(&candidate->link); 153 candidate->usage = 1; 154 candidate->state = AFS_WBACK_PENDING; 155 init_waitqueue_head(&candidate->waitq); 156 157 page = grab_cache_page_write_begin(mapping, index, flags); 158 if (!page) { 159 kfree(candidate); 160 return -ENOMEM; 161 } 162 *pagep = page; 163 /* page won't leak in error case: it eventually gets cleaned off LRU */ 164 165 if (!PageUptodate(page) && len != PAGE_SIZE) { 166 ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); 167 if (ret < 0) { 168 kfree(candidate); 169 _leave(" = %d [prep]", ret); 170 return ret; 171 } 172 SetPageUptodate(page); 173 } 174 175 try_again: 176 spin_lock(&vnode->writeback_lock); 177 178 /* see if this page is already pending a writeback under a suitable key 179 * - if so we can just join onto that one */ 180 wb = (struct afs_writeback *) page_private(page); 181 if (wb) { 182 if (wb->key == key && wb->state == AFS_WBACK_PENDING) 183 goto subsume_in_current_wb; 184 goto flush_conflicting_wb; 185 } 186 187 if (index > 0) { 188 /* see if we can find an already pending writeback that we can 189 * append this page to */ 190 list_for_each_entry(wb, &vnode->writebacks, link) { 191 if (wb->last == index - 1 && wb->key == key && 192 wb->state == AFS_WBACK_PENDING) 193 goto append_to_previous_wb; 194 } 195 } 196 197 list_add_tail(&candidate->link, &vnode->writebacks); 198 candidate->key = key_get(key); 199 spin_unlock(&vnode->writeback_lock); 200 SetPagePrivate(page); 201 set_page_private(page, (unsigned long) candidate); 202 _leave(" = 0 [new]"); 203 return 0; 204 205 subsume_in_current_wb: 206 _debug("subsume"); 207 ASSERTRANGE(wb->first, <=, index, <=, wb->last); 208 if (index == wb->first && from < wb->offset_first) 209 wb->offset_first = from; 210 if (index == wb->last && to > wb->to_last) 211 wb->to_last = to; 212 spin_unlock(&vnode->writeback_lock); 213 kfree(candidate); 214 _leave(" = 0 [sub]"); 215 return 0; 216 217 append_to_previous_wb: 218 _debug("append into %lx-%lx", wb->first, wb->last); 219 wb->usage++; 220 wb->last++; 221 wb->to_last = to; 222 spin_unlock(&vnode->writeback_lock); 223 SetPagePrivate(page); 224 set_page_private(page, (unsigned long) wb); 225 kfree(candidate); 226 _leave(" = 0 [app]"); 227 return 0; 228 229 /* the page is currently bound to another context, so if it's dirty we 230 * need to flush it before we can use the new context */ 231 flush_conflicting_wb: 232 _debug("flush conflict"); 233 if (wb->state == AFS_WBACK_PENDING) 234 wb->state = AFS_WBACK_CONFLICTING; 235 spin_unlock(&vnode->writeback_lock); 236 if (PageDirty(page)) { 237 ret = afs_write_back_from_locked_page(wb, page); 238 if (ret < 0) { 239 afs_put_writeback(candidate); 240 _leave(" = %d", ret); 241 return ret; 242 } 243 } 244 245 /* the page holds a ref on the writeback record */ 246 afs_put_writeback(wb); 247 set_page_private(page, 0); 248 ClearPagePrivate(page); 249 goto try_again; 250 } 251 252 /* 253 * finalise part of a write to a page 254 */ 255 int afs_write_end(struct file *file, struct address_space *mapping, 256 loff_t pos, unsigned len, unsigned copied, 257 struct page *page, void *fsdata) 258 { 259 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 260 loff_t i_size, maybe_i_size; 261 262 _enter("{%x:%u},{%lx}", 263 vnode->fid.vid, vnode->fid.vnode, page->index); 264 265 maybe_i_size = pos + copied; 266 267 i_size = i_size_read(&vnode->vfs_inode); 268 if (maybe_i_size > i_size) { 269 spin_lock(&vnode->writeback_lock); 270 i_size = i_size_read(&vnode->vfs_inode); 271 if (maybe_i_size > i_size) 272 i_size_write(&vnode->vfs_inode, maybe_i_size); 273 spin_unlock(&vnode->writeback_lock); 274 } 275 276 set_page_dirty(page); 277 if (PageDirty(page)) 278 _debug("dirtied"); 279 unlock_page(page); 280 put_page(page); 281 282 return copied; 283 } 284 285 /* 286 * kill all the pages in the given range 287 */ 288 static void afs_kill_pages(struct afs_vnode *vnode, bool error, 289 pgoff_t first, pgoff_t last) 290 { 291 struct pagevec pv; 292 unsigned count, loop; 293 294 _enter("{%x:%u},%lx-%lx", 295 vnode->fid.vid, vnode->fid.vnode, first, last); 296 297 pagevec_init(&pv, 0); 298 299 do { 300 _debug("kill %lx-%lx", first, last); 301 302 count = last - first + 1; 303 if (count > PAGEVEC_SIZE) 304 count = PAGEVEC_SIZE; 305 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, 306 first, count, pv.pages); 307 ASSERTCMP(pv.nr, ==, count); 308 309 for (loop = 0; loop < count; loop++) { 310 ClearPageUptodate(pv.pages[loop]); 311 if (error) 312 SetPageError(pv.pages[loop]); 313 end_page_writeback(pv.pages[loop]); 314 } 315 316 __pagevec_release(&pv); 317 } while (first < last); 318 319 _leave(""); 320 } 321 322 /* 323 * synchronously write back the locked page and any subsequent non-locked dirty 324 * pages also covered by the same writeback record 325 */ 326 static int afs_write_back_from_locked_page(struct afs_writeback *wb, 327 struct page *primary_page) 328 { 329 struct page *pages[8], *page; 330 unsigned long count; 331 unsigned n, offset, to; 332 pgoff_t start, first, last; 333 int loop, ret; 334 335 _enter(",%lx", primary_page->index); 336 337 count = 1; 338 if (!clear_page_dirty_for_io(primary_page)) 339 BUG(); 340 if (test_set_page_writeback(primary_page)) 341 BUG(); 342 343 /* find all consecutive lockable dirty pages, stopping when we find a 344 * page that is not immediately lockable, is not dirty or is missing, 345 * or we reach the end of the range */ 346 start = primary_page->index; 347 if (start >= wb->last) 348 goto no_more; 349 start++; 350 do { 351 _debug("more %lx [%lx]", start, count); 352 n = wb->last - start + 1; 353 if (n > ARRAY_SIZE(pages)) 354 n = ARRAY_SIZE(pages); 355 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, 356 start, n, pages); 357 _debug("fgpc %u", n); 358 if (n == 0) 359 goto no_more; 360 if (pages[0]->index != start) { 361 do { 362 put_page(pages[--n]); 363 } while (n > 0); 364 goto no_more; 365 } 366 367 for (loop = 0; loop < n; loop++) { 368 page = pages[loop]; 369 if (page->index > wb->last) 370 break; 371 if (!trylock_page(page)) 372 break; 373 if (!PageDirty(page) || 374 page_private(page) != (unsigned long) wb) { 375 unlock_page(page); 376 break; 377 } 378 if (!clear_page_dirty_for_io(page)) 379 BUG(); 380 if (test_set_page_writeback(page)) 381 BUG(); 382 unlock_page(page); 383 put_page(page); 384 } 385 count += loop; 386 if (loop < n) { 387 for (; loop < n; loop++) 388 put_page(pages[loop]); 389 goto no_more; 390 } 391 392 start += loop; 393 } while (start <= wb->last && count < 65536); 394 395 no_more: 396 /* we now have a contiguous set of dirty pages, each with writeback set 397 * and the dirty mark cleared; the first page is locked and must remain 398 * so, all the rest are unlocked */ 399 first = primary_page->index; 400 last = first + count - 1; 401 402 offset = (first == wb->first) ? wb->offset_first : 0; 403 to = (last == wb->last) ? wb->to_last : PAGE_SIZE; 404 405 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); 406 407 ret = afs_vnode_store_data(wb, first, last, offset, to); 408 if (ret < 0) { 409 switch (ret) { 410 case -EDQUOT: 411 case -ENOSPC: 412 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); 413 break; 414 case -EROFS: 415 case -EIO: 416 case -EREMOTEIO: 417 case -EFBIG: 418 case -ENOENT: 419 case -ENOMEDIUM: 420 case -ENXIO: 421 afs_kill_pages(wb->vnode, true, first, last); 422 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); 423 break; 424 case -EACCES: 425 case -EPERM: 426 case -ENOKEY: 427 case -EKEYEXPIRED: 428 case -EKEYREJECTED: 429 case -EKEYREVOKED: 430 afs_kill_pages(wb->vnode, false, first, last); 431 break; 432 default: 433 break; 434 } 435 } else { 436 ret = count; 437 } 438 439 _leave(" = %d", ret); 440 return ret; 441 } 442 443 /* 444 * write a page back to the server 445 * - the caller locked the page for us 446 */ 447 int afs_writepage(struct page *page, struct writeback_control *wbc) 448 { 449 struct afs_writeback *wb; 450 int ret; 451 452 _enter("{%lx},", page->index); 453 454 wb = (struct afs_writeback *) page_private(page); 455 ASSERT(wb != NULL); 456 457 ret = afs_write_back_from_locked_page(wb, page); 458 unlock_page(page); 459 if (ret < 0) { 460 _leave(" = %d", ret); 461 return 0; 462 } 463 464 wbc->nr_to_write -= ret; 465 466 _leave(" = 0"); 467 return 0; 468 } 469 470 /* 471 * write a region of pages back to the server 472 */ 473 static int afs_writepages_region(struct address_space *mapping, 474 struct writeback_control *wbc, 475 pgoff_t index, pgoff_t end, pgoff_t *_next) 476 { 477 struct afs_writeback *wb; 478 struct page *page; 479 int ret, n; 480 481 _enter(",,%lx,%lx,", index, end); 482 483 do { 484 n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, 485 1, &page); 486 if (!n) 487 break; 488 489 _debug("wback %lx", page->index); 490 491 if (page->index > end) { 492 *_next = index; 493 put_page(page); 494 _leave(" = 0 [%lx]", *_next); 495 return 0; 496 } 497 498 /* at this point we hold neither mapping->tree_lock nor lock on 499 * the page itself: the page may be truncated or invalidated 500 * (changing page->mapping to NULL), or even swizzled back from 501 * swapper_space to tmpfs file mapping 502 */ 503 lock_page(page); 504 505 if (page->mapping != mapping) { 506 unlock_page(page); 507 put_page(page); 508 continue; 509 } 510 511 if (wbc->sync_mode != WB_SYNC_NONE) 512 wait_on_page_writeback(page); 513 514 if (PageWriteback(page) || !PageDirty(page)) { 515 unlock_page(page); 516 continue; 517 } 518 519 wb = (struct afs_writeback *) page_private(page); 520 ASSERT(wb != NULL); 521 522 spin_lock(&wb->vnode->writeback_lock); 523 wb->state = AFS_WBACK_WRITING; 524 spin_unlock(&wb->vnode->writeback_lock); 525 526 ret = afs_write_back_from_locked_page(wb, page); 527 unlock_page(page); 528 put_page(page); 529 if (ret < 0) { 530 _leave(" = %d", ret); 531 return ret; 532 } 533 534 wbc->nr_to_write -= ret; 535 536 cond_resched(); 537 } while (index < end && wbc->nr_to_write > 0); 538 539 *_next = index; 540 _leave(" = 0 [%lx]", *_next); 541 return 0; 542 } 543 544 /* 545 * write some of the pending data back to the server 546 */ 547 int afs_writepages(struct address_space *mapping, 548 struct writeback_control *wbc) 549 { 550 pgoff_t start, end, next; 551 int ret; 552 553 _enter(""); 554 555 if (wbc->range_cyclic) { 556 start = mapping->writeback_index; 557 end = -1; 558 ret = afs_writepages_region(mapping, wbc, start, end, &next); 559 if (start > 0 && wbc->nr_to_write > 0 && ret == 0) 560 ret = afs_writepages_region(mapping, wbc, 0, start, 561 &next); 562 mapping->writeback_index = next; 563 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 564 end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT); 565 ret = afs_writepages_region(mapping, wbc, 0, end, &next); 566 if (wbc->nr_to_write > 0) 567 mapping->writeback_index = next; 568 } else { 569 start = wbc->range_start >> PAGE_SHIFT; 570 end = wbc->range_end >> PAGE_SHIFT; 571 ret = afs_writepages_region(mapping, wbc, start, end, &next); 572 } 573 574 _leave(" = %d", ret); 575 return ret; 576 } 577 578 /* 579 * completion of write to server 580 */ 581 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) 582 { 583 struct afs_writeback *wb = call->wb; 584 struct pagevec pv; 585 unsigned count, loop; 586 pgoff_t first = call->first, last = call->last; 587 bool free_wb; 588 589 _enter("{%x:%u},{%lx-%lx}", 590 vnode->fid.vid, vnode->fid.vnode, first, last); 591 592 ASSERT(wb != NULL); 593 594 pagevec_init(&pv, 0); 595 596 do { 597 _debug("done %lx-%lx", first, last); 598 599 count = last - first + 1; 600 if (count > PAGEVEC_SIZE) 601 count = PAGEVEC_SIZE; 602 pv.nr = find_get_pages_contig(call->mapping, first, count, 603 pv.pages); 604 ASSERTCMP(pv.nr, ==, count); 605 606 spin_lock(&vnode->writeback_lock); 607 for (loop = 0; loop < count; loop++) { 608 struct page *page = pv.pages[loop]; 609 end_page_writeback(page); 610 if (page_private(page) == (unsigned long) wb) { 611 set_page_private(page, 0); 612 ClearPagePrivate(page); 613 wb->usage--; 614 } 615 } 616 free_wb = false; 617 if (wb->usage == 0) { 618 afs_unlink_writeback(wb); 619 free_wb = true; 620 } 621 spin_unlock(&vnode->writeback_lock); 622 first += count; 623 if (free_wb) { 624 afs_free_writeback(wb); 625 wb = NULL; 626 } 627 628 __pagevec_release(&pv); 629 } while (first <= last); 630 631 _leave(""); 632 } 633 634 /* 635 * write to an AFS file 636 */ 637 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) 638 { 639 struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); 640 ssize_t result; 641 size_t count = iov_iter_count(from); 642 643 _enter("{%x.%u},{%zu},", 644 vnode->fid.vid, vnode->fid.vnode, count); 645 646 if (IS_SWAPFILE(&vnode->vfs_inode)) { 647 printk(KERN_INFO 648 "AFS: Attempt to write to active swap file!\n"); 649 return -EBUSY; 650 } 651 652 if (!count) 653 return 0; 654 655 result = generic_file_write_iter(iocb, from); 656 657 _leave(" = %zd", result); 658 return result; 659 } 660 661 /* 662 * flush the vnode to the fileserver 663 */ 664 int afs_writeback_all(struct afs_vnode *vnode) 665 { 666 struct address_space *mapping = vnode->vfs_inode.i_mapping; 667 struct writeback_control wbc = { 668 .sync_mode = WB_SYNC_ALL, 669 .nr_to_write = LONG_MAX, 670 .range_cyclic = 1, 671 }; 672 int ret; 673 674 _enter(""); 675 676 ret = mapping->a_ops->writepages(mapping, &wbc); 677 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 678 679 _leave(" = %d", ret); 680 return ret; 681 } 682 683 /* 684 * flush any dirty pages for this process, and check for write errors. 685 * - the return status from this call provides a reliable indication of 686 * whether any write errors occurred for this process. 687 */ 688 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 689 { 690 struct inode *inode = file_inode(file); 691 struct afs_writeback *wb, *xwb; 692 struct afs_vnode *vnode = AFS_FS_I(inode); 693 int ret; 694 695 _enter("{%x:%u},{n=%pD},%d", 696 vnode->fid.vid, vnode->fid.vnode, file, 697 datasync); 698 699 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 700 if (ret) 701 return ret; 702 inode_lock(inode); 703 704 /* use a writeback record as a marker in the queue - when this reaches 705 * the front of the queue, all the outstanding writes are either 706 * completed or rejected */ 707 wb = kzalloc(sizeof(*wb), GFP_KERNEL); 708 if (!wb) { 709 ret = -ENOMEM; 710 goto out; 711 } 712 wb->vnode = vnode; 713 wb->first = 0; 714 wb->last = -1; 715 wb->offset_first = 0; 716 wb->to_last = PAGE_SIZE; 717 wb->usage = 1; 718 wb->state = AFS_WBACK_SYNCING; 719 init_waitqueue_head(&wb->waitq); 720 721 spin_lock(&vnode->writeback_lock); 722 list_for_each_entry(xwb, &vnode->writebacks, link) { 723 if (xwb->state == AFS_WBACK_PENDING) 724 xwb->state = AFS_WBACK_CONFLICTING; 725 } 726 list_add_tail(&wb->link, &vnode->writebacks); 727 spin_unlock(&vnode->writeback_lock); 728 729 /* push all the outstanding writebacks to the server */ 730 ret = afs_writeback_all(vnode); 731 if (ret < 0) { 732 afs_put_writeback(wb); 733 _leave(" = %d [wb]", ret); 734 goto out; 735 } 736 737 /* wait for the preceding writes to actually complete */ 738 ret = wait_event_interruptible(wb->waitq, 739 wb->state == AFS_WBACK_COMPLETE || 740 vnode->writebacks.next == &wb->link); 741 afs_put_writeback(wb); 742 _leave(" = %d", ret); 743 out: 744 inode_unlock(inode); 745 return ret; 746 } 747 748 /* 749 * notification that a previously read-only page is about to become writable 750 * - if it returns an error, the caller will deliver a bus error signal 751 */ 752 int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 753 { 754 struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); 755 756 _enter("{{%x:%u}},{%lx}", 757 vnode->fid.vid, vnode->fid.vnode, page->index); 758 759 /* wait for the page to be written to the cache before we allow it to 760 * be modified */ 761 #ifdef CONFIG_AFS_FSCACHE 762 fscache_wait_on_page_write(vnode->cache, page); 763 #endif 764 765 _leave(" = 0"); 766 return 0; 767 } 768