1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem high-level write support. 3 * 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/pagevec.h> 14 #include "internal.h" 15 16 /* 17 * Determined write method. Adjust netfs_folio_traces if this is changed. 18 */ 19 enum netfs_how_to_modify { 20 NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */ 21 NETFS_JUST_PREFETCH, /* We have to read the folio anyway */ 22 NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */ 23 NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */ 24 NETFS_STREAMING_WRITE, /* Store incomplete data in non-uptodate page. */ 25 NETFS_STREAMING_WRITE_CONT, /* Continue streaming write. */ 26 NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ 27 }; 28 29 static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq); 30 31 static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) 32 { 33 if (netfs_group && !folio_get_private(folio)) 34 folio_attach_private(folio, netfs_get_group(netfs_group)); 35 } 36 37 #if IS_ENABLED(CONFIG_FSCACHE) 38 static void netfs_folio_start_fscache(bool caching, struct folio *folio) 39 { 40 if (caching) 41 folio_start_fscache(folio); 42 } 43 #else 44 static void netfs_folio_start_fscache(bool caching, struct folio *folio) 45 { 46 } 47 #endif 48 49 /* 50 * Decide how we should modify a folio. We might be attempting to do 51 * write-streaming, in which case we don't want to a local RMW cycle if we can 52 * avoid it. If we're doing local caching or content crypto, we award that 53 * priority over avoiding RMW. If the file is open readably, then we also 54 * assume that we may want to read what we wrote. 55 */ 56 static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, 57 struct file *file, 58 struct folio *folio, 59 void *netfs_group, 60 size_t flen, 61 size_t offset, 62 size_t len, 63 bool maybe_trouble) 64 { 65 struct netfs_folio *finfo = netfs_folio_info(folio); 66 loff_t pos = folio_file_pos(folio); 67 68 _enter(""); 69 70 if (netfs_folio_group(folio) != netfs_group) 71 return NETFS_FLUSH_CONTENT; 72 73 if (folio_test_uptodate(folio)) 74 return NETFS_FOLIO_IS_UPTODATE; 75 76 if (pos >= ctx->zero_point) 77 return NETFS_MODIFY_AND_CLEAR; 78 79 if (!maybe_trouble && offset == 0 && len >= flen) 80 return NETFS_WHOLE_FOLIO_MODIFY; 81 82 if (file->f_mode & FMODE_READ) 83 goto no_write_streaming; 84 if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 85 goto no_write_streaming; 86 87 if (netfs_is_cache_enabled(ctx)) { 88 /* We don't want to get a streaming write on a file that loses 89 * caching service temporarily because the backing store got 90 * culled. 91 */ 92 if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 93 set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags); 94 goto no_write_streaming; 95 } 96 97 if (!finfo) 98 return NETFS_STREAMING_WRITE; 99 100 /* We can continue a streaming write only if it continues on from the 101 * previous. If it overlaps, we must flush lest we suffer a partial 102 * copy and disjoint dirty regions. 103 */ 104 if (offset == finfo->dirty_offset + finfo->dirty_len) 105 return NETFS_STREAMING_WRITE_CONT; 106 return NETFS_FLUSH_CONTENT; 107 108 no_write_streaming: 109 if (finfo) { 110 netfs_stat(&netfs_n_wh_wstream_conflict); 111 return NETFS_FLUSH_CONTENT; 112 } 113 return NETFS_JUST_PREFETCH; 114 } 115 116 /* 117 * Grab a folio for writing and lock it. Attempt to allocate as large a folio 118 * as possible to hold as much of the remaining length as possible in one go. 119 */ 120 static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, 121 loff_t pos, size_t part) 122 { 123 pgoff_t index = pos / PAGE_SIZE; 124 fgf_t fgp_flags = FGP_WRITEBEGIN; 125 126 if (mapping_large_folio_support(mapping)) 127 fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part); 128 129 return __filemap_get_folio(mapping, index, fgp_flags, 130 mapping_gfp_mask(mapping)); 131 } 132 133 /** 134 * netfs_perform_write - Copy data into the pagecache. 135 * @iocb: The operation parameters 136 * @iter: The source buffer 137 * @netfs_group: Grouping for dirty pages (eg. ceph snaps). 138 * 139 * Copy data into pagecache pages attached to the inode specified by @iocb. 140 * The caller must hold appropriate inode locks. 141 * 142 * Dirty pages are tagged with a netfs_folio struct if they're not up to date 143 * to indicate the range modified. Dirty pages may also be tagged with a 144 * netfs-specific grouping such that data from an old group gets flushed before 145 * a new one is started. 146 */ 147 ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, 148 struct netfs_group *netfs_group) 149 { 150 struct file *file = iocb->ki_filp; 151 struct inode *inode = file_inode(file); 152 struct address_space *mapping = inode->i_mapping; 153 struct netfs_inode *ctx = netfs_inode(inode); 154 struct writeback_control wbc = { 155 .sync_mode = WB_SYNC_NONE, 156 .for_sync = true, 157 .nr_to_write = LONG_MAX, 158 .range_start = iocb->ki_pos, 159 .range_end = iocb->ki_pos + iter->count, 160 }; 161 struct netfs_io_request *wreq = NULL; 162 struct netfs_folio *finfo; 163 struct folio *folio; 164 enum netfs_how_to_modify howto; 165 enum netfs_folio_trace trace; 166 unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; 167 ssize_t written = 0, ret; 168 loff_t i_size, pos = iocb->ki_pos, from, to; 169 size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; 170 bool maybe_trouble = false; 171 172 if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || 173 iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) 174 ) { 175 if (pos < i_size_read(inode)) { 176 ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); 177 if (ret < 0) { 178 goto out; 179 } 180 } 181 182 wbc_attach_fdatawrite_inode(&wbc, mapping->host); 183 184 wreq = netfs_begin_writethrough(iocb, iter->count); 185 if (IS_ERR(wreq)) { 186 wbc_detach_inode(&wbc); 187 ret = PTR_ERR(wreq); 188 wreq = NULL; 189 goto out; 190 } 191 if (!is_sync_kiocb(iocb)) 192 wreq->iocb = iocb; 193 wreq->cleanup = netfs_cleanup_buffered_write; 194 } 195 196 do { 197 size_t flen; 198 size_t offset; /* Offset into pagecache folio */ 199 size_t part; /* Bytes to write to folio */ 200 size_t copied; /* Bytes copied from user */ 201 202 ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); 203 if (unlikely(ret < 0)) 204 break; 205 206 offset = pos & (max_chunk - 1); 207 part = min(max_chunk - offset, iov_iter_count(iter)); 208 209 /* Bring in the user pages that we will copy from _first_ lest 210 * we hit a nasty deadlock on copying from the same page as 211 * we're writing to, without it being marked uptodate. 212 * 213 * Not only is this an optimisation, but it is also required to 214 * check that the address is actually valid, when atomic 215 * usercopies are used below. 216 * 217 * We rely on the page being held onto long enough by the LRU 218 * that we can grab it below if this causes it to be read. 219 */ 220 ret = -EFAULT; 221 if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) 222 break; 223 224 folio = netfs_grab_folio_for_write(mapping, pos, part); 225 if (IS_ERR(folio)) { 226 ret = PTR_ERR(folio); 227 break; 228 } 229 230 flen = folio_size(folio); 231 offset = pos & (flen - 1); 232 part = min_t(size_t, flen - offset, part); 233 234 if (signal_pending(current)) { 235 ret = written ? -EINTR : -ERESTARTSYS; 236 goto error_folio_unlock; 237 } 238 239 /* See if we need to prefetch the area we're going to modify. 240 * We need to do this before we get a lock on the folio in case 241 * there's more than one writer competing for the same cache 242 * block. 243 */ 244 howto = netfs_how_to_modify(ctx, file, folio, netfs_group, 245 flen, offset, part, maybe_trouble); 246 _debug("howto %u", howto); 247 switch (howto) { 248 case NETFS_JUST_PREFETCH: 249 ret = netfs_prefetch_for_write(file, folio, offset, part); 250 if (ret < 0) { 251 _debug("prefetch = %zd", ret); 252 goto error_folio_unlock; 253 } 254 break; 255 case NETFS_FOLIO_IS_UPTODATE: 256 case NETFS_WHOLE_FOLIO_MODIFY: 257 case NETFS_STREAMING_WRITE_CONT: 258 break; 259 case NETFS_MODIFY_AND_CLEAR: 260 zero_user_segment(&folio->page, 0, offset); 261 break; 262 case NETFS_STREAMING_WRITE: 263 ret = -EIO; 264 if (WARN_ON(folio_get_private(folio))) 265 goto error_folio_unlock; 266 break; 267 case NETFS_FLUSH_CONTENT: 268 trace_netfs_folio(folio, netfs_flush_content); 269 from = folio_pos(folio); 270 to = from + folio_size(folio) - 1; 271 folio_unlock(folio); 272 folio_put(folio); 273 ret = filemap_write_and_wait_range(mapping, from, to); 274 if (ret < 0) 275 goto error_folio_unlock; 276 continue; 277 } 278 279 if (mapping_writably_mapped(mapping)) 280 flush_dcache_folio(folio); 281 282 copied = copy_folio_from_iter_atomic(folio, offset, part, iter); 283 284 flush_dcache_folio(folio); 285 286 /* Deal with a (partially) failed copy */ 287 if (copied == 0) { 288 ret = -EFAULT; 289 goto error_folio_unlock; 290 } 291 292 trace = (enum netfs_folio_trace)howto; 293 switch (howto) { 294 case NETFS_FOLIO_IS_UPTODATE: 295 case NETFS_JUST_PREFETCH: 296 netfs_set_group(folio, netfs_group); 297 break; 298 case NETFS_MODIFY_AND_CLEAR: 299 zero_user_segment(&folio->page, offset + copied, flen); 300 netfs_set_group(folio, netfs_group); 301 folio_mark_uptodate(folio); 302 break; 303 case NETFS_WHOLE_FOLIO_MODIFY: 304 if (unlikely(copied < part)) { 305 maybe_trouble = true; 306 iov_iter_revert(iter, copied); 307 copied = 0; 308 goto retry; 309 } 310 netfs_set_group(folio, netfs_group); 311 folio_mark_uptodate(folio); 312 break; 313 case NETFS_STREAMING_WRITE: 314 if (offset == 0 && copied == flen) { 315 netfs_set_group(folio, netfs_group); 316 folio_mark_uptodate(folio); 317 trace = netfs_streaming_filled_page; 318 break; 319 } 320 finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); 321 if (!finfo) { 322 iov_iter_revert(iter, copied); 323 ret = -ENOMEM; 324 goto error_folio_unlock; 325 } 326 finfo->netfs_group = netfs_get_group(netfs_group); 327 finfo->dirty_offset = offset; 328 finfo->dirty_len = copied; 329 folio_attach_private(folio, (void *)((unsigned long)finfo | 330 NETFS_FOLIO_INFO)); 331 break; 332 case NETFS_STREAMING_WRITE_CONT: 333 finfo = netfs_folio_info(folio); 334 finfo->dirty_len += copied; 335 if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { 336 if (finfo->netfs_group) 337 folio_change_private(folio, finfo->netfs_group); 338 else 339 folio_detach_private(folio); 340 folio_mark_uptodate(folio); 341 kfree(finfo); 342 trace = netfs_streaming_cont_filled_page; 343 } 344 break; 345 default: 346 WARN(true, "Unexpected modify type %u ix=%lx\n", 347 howto, folio->index); 348 ret = -EIO; 349 goto error_folio_unlock; 350 } 351 352 trace_netfs_folio(folio, trace); 353 354 /* Update the inode size if we moved the EOF marker */ 355 i_size = i_size_read(inode); 356 pos += copied; 357 if (pos > i_size) { 358 if (ctx->ops->update_i_size) { 359 ctx->ops->update_i_size(inode, pos); 360 } else { 361 i_size_write(inode, pos); 362 #if IS_ENABLED(CONFIG_FSCACHE) 363 fscache_update_cookie(ctx->cache, NULL, &pos); 364 #endif 365 } 366 } 367 written += copied; 368 369 if (likely(!wreq)) { 370 folio_mark_dirty(folio); 371 } else { 372 if (folio_test_dirty(folio)) 373 /* Sigh. mmap. */ 374 folio_clear_dirty_for_io(folio); 375 /* We make multiple writes to the folio... */ 376 if (!folio_test_writeback(folio)) { 377 folio_wait_fscache(folio); 378 folio_start_writeback(folio); 379 folio_start_fscache(folio); 380 if (wreq->iter.count == 0) 381 trace_netfs_folio(folio, netfs_folio_trace_wthru); 382 else 383 trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 384 } 385 netfs_advance_writethrough(wreq, copied, 386 offset + copied == flen); 387 } 388 retry: 389 folio_unlock(folio); 390 folio_put(folio); 391 folio = NULL; 392 393 cond_resched(); 394 } while (iov_iter_count(iter)); 395 396 out: 397 if (unlikely(wreq)) { 398 ret = netfs_end_writethrough(wreq, iocb); 399 wbc_detach_inode(&wbc); 400 if (ret == -EIOCBQUEUED) 401 return ret; 402 } 403 404 iocb->ki_pos += written; 405 _leave(" = %zd [%zd]", written, ret); 406 return written ? written : ret; 407 408 error_folio_unlock: 409 folio_unlock(folio); 410 folio_put(folio); 411 goto out; 412 } 413 EXPORT_SYMBOL(netfs_perform_write); 414 415 /** 416 * netfs_buffered_write_iter_locked - write data to a file 417 * @iocb: IO state structure (file, offset, etc.) 418 * @from: iov_iter with data to write 419 * @netfs_group: Grouping for dirty pages (eg. ceph snaps). 420 * 421 * This function does all the work needed for actually writing data to a 422 * file. It does all basic checks, removes SUID from the file, updates 423 * modification times and calls proper subroutines depending on whether we 424 * do direct IO or a standard buffered write. 425 * 426 * The caller must hold appropriate locks around this function and have called 427 * generic_write_checks() already. The caller is also responsible for doing 428 * any necessary syncing afterwards. 429 * 430 * This function does *not* take care of syncing data in case of O_SYNC write. 431 * A caller has to handle it. This is mainly due to the fact that we want to 432 * avoid syncing under i_rwsem. 433 * 434 * Return: 435 * * number of bytes written, even for truncated writes 436 * * negative error code if no data has been written at all 437 */ 438 ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, 439 struct netfs_group *netfs_group) 440 { 441 struct file *file = iocb->ki_filp; 442 ssize_t ret; 443 444 trace_netfs_write_iter(iocb, from); 445 446 ret = file_remove_privs(file); 447 if (ret) 448 return ret; 449 450 ret = file_update_time(file); 451 if (ret) 452 return ret; 453 454 return netfs_perform_write(iocb, from, netfs_group); 455 } 456 EXPORT_SYMBOL(netfs_buffered_write_iter_locked); 457 458 /** 459 * netfs_file_write_iter - write data to a file 460 * @iocb: IO state structure 461 * @from: iov_iter with data to write 462 * 463 * Perform a write to a file, writing into the pagecache if possible and doing 464 * an unbuffered write instead if not. 465 * 466 * Return: 467 * * Negative error code if no data has been written at all of 468 * vfs_fsync_range() failed for a synchronous write 469 * * Number of bytes written, even for truncated writes 470 */ 471 ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 472 { 473 struct file *file = iocb->ki_filp; 474 struct inode *inode = file->f_mapping->host; 475 struct netfs_inode *ictx = netfs_inode(inode); 476 ssize_t ret; 477 478 _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 479 480 if (!iov_iter_count(from)) 481 return 0; 482 483 if ((iocb->ki_flags & IOCB_DIRECT) || 484 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 485 return netfs_unbuffered_write_iter(iocb, from); 486 487 ret = netfs_start_io_write(inode); 488 if (ret < 0) 489 return ret; 490 491 ret = generic_write_checks(iocb, from); 492 if (ret > 0) 493 ret = netfs_buffered_write_iter_locked(iocb, from, NULL); 494 netfs_end_io_write(inode); 495 if (ret > 0) 496 ret = generic_write_sync(iocb, ret); 497 return ret; 498 } 499 EXPORT_SYMBOL(netfs_file_write_iter); 500 501 /* 502 * Notification that a previously read-only page is about to become writable. 503 * Note that the caller indicates a single page of a multipage folio. 504 */ 505 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) 506 { 507 struct folio *folio = page_folio(vmf->page); 508 struct file *file = vmf->vma->vm_file; 509 struct inode *inode = file_inode(file); 510 vm_fault_t ret = VM_FAULT_RETRY; 511 int err; 512 513 _enter("%lx", folio->index); 514 515 sb_start_pagefault(inode->i_sb); 516 517 if (folio_wait_writeback_killable(folio)) 518 goto out; 519 520 if (folio_lock_killable(folio) < 0) 521 goto out; 522 523 /* Can we see a streaming write here? */ 524 if (WARN_ON(!folio_test_uptodate(folio))) { 525 ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED; 526 goto out; 527 } 528 529 if (netfs_folio_group(folio) != netfs_group) { 530 folio_unlock(folio); 531 err = filemap_fdatawait_range(inode->i_mapping, 532 folio_pos(folio), 533 folio_pos(folio) + folio_size(folio)); 534 switch (err) { 535 case 0: 536 ret = VM_FAULT_RETRY; 537 goto out; 538 case -ENOMEM: 539 ret = VM_FAULT_OOM; 540 goto out; 541 default: 542 ret = VM_FAULT_SIGBUS; 543 goto out; 544 } 545 } 546 547 if (folio_test_dirty(folio)) 548 trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); 549 else 550 trace_netfs_folio(folio, netfs_folio_trace_mkwrite); 551 netfs_set_group(folio, netfs_group); 552 file_update_time(file); 553 ret = VM_FAULT_LOCKED; 554 out: 555 sb_end_pagefault(inode->i_sb); 556 return ret; 557 } 558 EXPORT_SYMBOL(netfs_page_mkwrite); 559 560 /* 561 * Kill all the pages in the given range 562 */ 563 static void netfs_kill_pages(struct address_space *mapping, 564 loff_t start, loff_t len) 565 { 566 struct folio *folio; 567 pgoff_t index = start / PAGE_SIZE; 568 pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 569 570 _enter("%llx-%llx", start, start + len - 1); 571 572 do { 573 _debug("kill %lx (to %lx)", index, last); 574 575 folio = filemap_get_folio(mapping, index); 576 if (IS_ERR(folio)) { 577 next = index + 1; 578 continue; 579 } 580 581 next = folio_next_index(folio); 582 583 trace_netfs_folio(folio, netfs_folio_trace_kill); 584 folio_clear_uptodate(folio); 585 if (folio_test_fscache(folio)) 586 folio_end_fscache(folio); 587 folio_end_writeback(folio); 588 folio_lock(folio); 589 generic_error_remove_folio(mapping, folio); 590 folio_unlock(folio); 591 folio_put(folio); 592 593 } while (index = next, index <= last); 594 595 _leave(""); 596 } 597 598 /* 599 * Redirty all the pages in a given range. 600 */ 601 static void netfs_redirty_pages(struct address_space *mapping, 602 loff_t start, loff_t len) 603 { 604 struct folio *folio; 605 pgoff_t index = start / PAGE_SIZE; 606 pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 607 608 _enter("%llx-%llx", start, start + len - 1); 609 610 do { 611 _debug("redirty %llx @%llx", len, start); 612 613 folio = filemap_get_folio(mapping, index); 614 if (IS_ERR(folio)) { 615 next = index + 1; 616 continue; 617 } 618 619 next = folio_next_index(folio); 620 trace_netfs_folio(folio, netfs_folio_trace_redirty); 621 filemap_dirty_folio(mapping, folio); 622 if (folio_test_fscache(folio)) 623 folio_end_fscache(folio); 624 folio_end_writeback(folio); 625 folio_put(folio); 626 } while (index = next, index <= last); 627 628 balance_dirty_pages_ratelimited(mapping); 629 630 _leave(""); 631 } 632 633 /* 634 * Completion of write to server 635 */ 636 static void netfs_pages_written_back(struct netfs_io_request *wreq) 637 { 638 struct address_space *mapping = wreq->mapping; 639 struct netfs_folio *finfo; 640 struct netfs_group *group = NULL; 641 struct folio *folio; 642 pgoff_t last; 643 int gcount = 0; 644 645 XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE); 646 647 _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 648 649 rcu_read_lock(); 650 651 last = (wreq->start + wreq->len - 1) / PAGE_SIZE; 652 xas_for_each(&xas, folio, last) { 653 WARN(!folio_test_writeback(folio), 654 "bad %zx @%llx page %lx %lx\n", 655 wreq->len, wreq->start, folio->index, last); 656 657 if ((finfo = netfs_folio_info(folio))) { 658 /* Streaming writes cannot be redirtied whilst under 659 * writeback, so discard the streaming record. 660 */ 661 folio_detach_private(folio); 662 group = finfo->netfs_group; 663 gcount++; 664 trace_netfs_folio(folio, netfs_folio_trace_clear_s); 665 kfree(finfo); 666 } else if ((group = netfs_folio_group(folio))) { 667 /* Need to detach the group pointer if the page didn't 668 * get redirtied. If it has been redirtied, then it 669 * must be within the same group. 670 */ 671 if (folio_test_dirty(folio)) { 672 trace_netfs_folio(folio, netfs_folio_trace_redirtied); 673 goto end_wb; 674 } 675 if (folio_trylock(folio)) { 676 if (!folio_test_dirty(folio)) { 677 folio_detach_private(folio); 678 gcount++; 679 trace_netfs_folio(folio, netfs_folio_trace_clear_g); 680 } else { 681 trace_netfs_folio(folio, netfs_folio_trace_redirtied); 682 } 683 folio_unlock(folio); 684 goto end_wb; 685 } 686 687 xas_pause(&xas); 688 rcu_read_unlock(); 689 folio_lock(folio); 690 if (!folio_test_dirty(folio)) { 691 folio_detach_private(folio); 692 gcount++; 693 trace_netfs_folio(folio, netfs_folio_trace_clear_g); 694 } else { 695 trace_netfs_folio(folio, netfs_folio_trace_redirtied); 696 } 697 folio_unlock(folio); 698 rcu_read_lock(); 699 } else { 700 trace_netfs_folio(folio, netfs_folio_trace_clear); 701 } 702 end_wb: 703 if (folio_test_fscache(folio)) 704 folio_end_fscache(folio); 705 xas_advance(&xas, folio_next_index(folio) - 1); 706 folio_end_writeback(folio); 707 } 708 709 rcu_read_unlock(); 710 netfs_put_group_many(group, gcount); 711 _leave(""); 712 } 713 714 /* 715 * Deal with the disposition of the folios that are under writeback to close 716 * out the operation. 717 */ 718 static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq) 719 { 720 struct address_space *mapping = wreq->mapping; 721 722 _enter(""); 723 724 switch (wreq->error) { 725 case 0: 726 netfs_pages_written_back(wreq); 727 break; 728 729 default: 730 pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error); 731 fallthrough; 732 case -EACCES: 733 case -EPERM: 734 case -ENOKEY: 735 case -EKEYEXPIRED: 736 case -EKEYREJECTED: 737 case -EKEYREVOKED: 738 case -ENETRESET: 739 case -EDQUOT: 740 case -ENOSPC: 741 netfs_redirty_pages(mapping, wreq->start, wreq->len); 742 break; 743 744 case -EROFS: 745 case -EIO: 746 case -EREMOTEIO: 747 case -EFBIG: 748 case -ENOENT: 749 case -ENOMEDIUM: 750 case -ENXIO: 751 netfs_kill_pages(mapping, wreq->start, wreq->len); 752 break; 753 } 754 755 if (wreq->error) 756 mapping_set_error(mapping, wreq->error); 757 if (wreq->netfs_ops->done) 758 wreq->netfs_ops->done(wreq); 759 } 760 761 /* 762 * Extend the region to be written back to include subsequent contiguously 763 * dirty pages if possible, but don't sleep while doing so. 764 * 765 * If this page holds new content, then we can include filler zeros in the 766 * writeback. 767 */ 768 static void netfs_extend_writeback(struct address_space *mapping, 769 struct netfs_group *group, 770 struct xa_state *xas, 771 long *_count, 772 loff_t start, 773 loff_t max_len, 774 bool caching, 775 size_t *_len, 776 size_t *_top) 777 { 778 struct netfs_folio *finfo; 779 struct folio_batch fbatch; 780 struct folio *folio; 781 unsigned int i; 782 pgoff_t index = (start + *_len) / PAGE_SIZE; 783 size_t len; 784 void *priv; 785 bool stop = true; 786 787 folio_batch_init(&fbatch); 788 789 do { 790 /* Firstly, we gather up a batch of contiguous dirty pages 791 * under the RCU read lock - but we can't clear the dirty flags 792 * there if any of those pages are mapped. 793 */ 794 rcu_read_lock(); 795 796 xas_for_each(xas, folio, ULONG_MAX) { 797 stop = true; 798 if (xas_retry(xas, folio)) 799 continue; 800 if (xa_is_value(folio)) 801 break; 802 if (folio->index != index) { 803 xas_reset(xas); 804 break; 805 } 806 807 if (!folio_try_get_rcu(folio)) { 808 xas_reset(xas); 809 continue; 810 } 811 812 /* Has the folio moved or been split? */ 813 if (unlikely(folio != xas_reload(xas))) { 814 folio_put(folio); 815 xas_reset(xas); 816 break; 817 } 818 819 if (!folio_trylock(folio)) { 820 folio_put(folio); 821 xas_reset(xas); 822 break; 823 } 824 if (!folio_test_dirty(folio) || 825 folio_test_writeback(folio) || 826 folio_test_fscache(folio)) { 827 folio_unlock(folio); 828 folio_put(folio); 829 xas_reset(xas); 830 break; 831 } 832 833 stop = false; 834 len = folio_size(folio); 835 priv = folio_get_private(folio); 836 if ((const struct netfs_group *)priv != group) { 837 stop = true; 838 finfo = netfs_folio_info(folio); 839 if (finfo->netfs_group != group || 840 finfo->dirty_offset > 0) { 841 folio_unlock(folio); 842 folio_put(folio); 843 xas_reset(xas); 844 break; 845 } 846 len = finfo->dirty_len; 847 } 848 849 *_top += folio_size(folio); 850 index += folio_nr_pages(folio); 851 *_count -= folio_nr_pages(folio); 852 *_len += len; 853 if (*_len >= max_len || *_count <= 0) 854 stop = true; 855 856 if (!folio_batch_add(&fbatch, folio)) 857 break; 858 if (stop) 859 break; 860 } 861 862 xas_pause(xas); 863 rcu_read_unlock(); 864 865 /* Now, if we obtained any folios, we can shift them to being 866 * writable and mark them for caching. 867 */ 868 if (!folio_batch_count(&fbatch)) 869 break; 870 871 for (i = 0; i < folio_batch_count(&fbatch); i++) { 872 folio = fbatch.folios[i]; 873 trace_netfs_folio(folio, netfs_folio_trace_store_plus); 874 875 if (!folio_clear_dirty_for_io(folio)) 876 BUG(); 877 folio_start_writeback(folio); 878 netfs_folio_start_fscache(caching, folio); 879 folio_unlock(folio); 880 } 881 882 folio_batch_release(&fbatch); 883 cond_resched(); 884 } while (!stop); 885 } 886 887 /* 888 * Synchronously write back the locked page and any subsequent non-locked dirty 889 * pages. 890 */ 891 static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, 892 struct writeback_control *wbc, 893 struct netfs_group *group, 894 struct xa_state *xas, 895 struct folio *folio, 896 unsigned long long start, 897 unsigned long long end) 898 { 899 struct netfs_io_request *wreq; 900 struct netfs_folio *finfo; 901 struct netfs_inode *ctx = netfs_inode(mapping->host); 902 unsigned long long i_size = i_size_read(&ctx->inode); 903 size_t len, max_len; 904 bool caching = netfs_is_cache_enabled(ctx); 905 long count = wbc->nr_to_write; 906 int ret; 907 908 _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching); 909 910 wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), 911 NETFS_WRITEBACK); 912 if (IS_ERR(wreq)) { 913 folio_unlock(folio); 914 return PTR_ERR(wreq); 915 } 916 917 if (!folio_clear_dirty_for_io(folio)) 918 BUG(); 919 folio_start_writeback(folio); 920 netfs_folio_start_fscache(caching, folio); 921 922 count -= folio_nr_pages(folio); 923 924 /* Find all consecutive lockable dirty pages that have contiguous 925 * written regions, stopping when we find a page that is not 926 * immediately lockable, is not dirty or is missing, or we reach the 927 * end of the range. 928 */ 929 trace_netfs_folio(folio, netfs_folio_trace_store); 930 931 len = wreq->len; 932 finfo = netfs_folio_info(folio); 933 if (finfo) { 934 start += finfo->dirty_offset; 935 if (finfo->dirty_offset + finfo->dirty_len != len) { 936 len = finfo->dirty_len; 937 goto cant_expand; 938 } 939 len = finfo->dirty_len; 940 } 941 942 if (start < i_size) { 943 /* Trim the write to the EOF; the extra data is ignored. Also 944 * put an upper limit on the size of a single storedata op. 945 */ 946 max_len = 65536 * 4096; 947 max_len = min_t(unsigned long long, max_len, end - start + 1); 948 max_len = min_t(unsigned long long, max_len, i_size - start); 949 950 if (len < max_len) 951 netfs_extend_writeback(mapping, group, xas, &count, start, 952 max_len, caching, &len, &wreq->upper_len); 953 } 954 955 cant_expand: 956 len = min_t(unsigned long long, len, i_size - start); 957 958 /* We now have a contiguous set of dirty pages, each with writeback 959 * set; the first page is still locked at this point, but all the rest 960 * have been unlocked. 961 */ 962 folio_unlock(folio); 963 wreq->start = start; 964 wreq->len = len; 965 966 if (start < i_size) { 967 _debug("write back %zx @%llx [%llx]", len, start, i_size); 968 969 /* Speculatively write to the cache. We have to fix this up 970 * later if the store fails. 971 */ 972 wreq->cleanup = netfs_cleanup_buffered_write; 973 974 iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start, 975 wreq->upper_len); 976 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 977 ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback); 978 if (ret == 0 || ret == -EIOCBQUEUED) 979 wbc->nr_to_write -= len / PAGE_SIZE; 980 } else { 981 _debug("write discard %zx @%llx [%llx]", len, start, i_size); 982 983 /* The dirty region was entirely beyond the EOF. */ 984 fscache_clear_page_bits(mapping, start, len, caching); 985 netfs_pages_written_back(wreq); 986 ret = 0; 987 } 988 989 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 990 _leave(" = 1"); 991 return 1; 992 } 993 994 /* 995 * Write a region of pages back to the server 996 */ 997 static ssize_t netfs_writepages_begin(struct address_space *mapping, 998 struct writeback_control *wbc, 999 struct netfs_group *group, 1000 struct xa_state *xas, 1001 unsigned long long *_start, 1002 unsigned long long end) 1003 { 1004 const struct netfs_folio *finfo; 1005 struct folio *folio; 1006 unsigned long long start = *_start; 1007 ssize_t ret; 1008 void *priv; 1009 int skips = 0; 1010 1011 _enter("%llx,%llx,", start, end); 1012 1013 search_again: 1014 /* Find the first dirty page in the group. */ 1015 rcu_read_lock(); 1016 1017 for (;;) { 1018 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 1019 if (xas_retry(xas, folio) || xa_is_value(folio)) 1020 continue; 1021 if (!folio) 1022 break; 1023 1024 if (!folio_try_get_rcu(folio)) { 1025 xas_reset(xas); 1026 continue; 1027 } 1028 1029 if (unlikely(folio != xas_reload(xas))) { 1030 folio_put(folio); 1031 xas_reset(xas); 1032 continue; 1033 } 1034 1035 /* Skip any dirty folio that's not in the group of interest. */ 1036 priv = folio_get_private(folio); 1037 if ((const struct netfs_group *)priv != group) { 1038 finfo = netfs_folio_info(folio); 1039 if (finfo->netfs_group != group) { 1040 folio_put(folio); 1041 continue; 1042 } 1043 } 1044 1045 xas_pause(xas); 1046 break; 1047 } 1048 rcu_read_unlock(); 1049 if (!folio) 1050 return 0; 1051 1052 start = folio_pos(folio); /* May regress with THPs */ 1053 1054 _debug("wback %lx", folio->index); 1055 1056 /* At this point we hold neither the i_pages lock nor the page lock: 1057 * the page may be truncated or invalidated (changing page->mapping to 1058 * NULL), or even swizzled back from swapper_space to tmpfs file 1059 * mapping 1060 */ 1061 lock_again: 1062 if (wbc->sync_mode != WB_SYNC_NONE) { 1063 ret = folio_lock_killable(folio); 1064 if (ret < 0) 1065 return ret; 1066 } else { 1067 if (!folio_trylock(folio)) 1068 goto search_again; 1069 } 1070 1071 if (folio->mapping != mapping || 1072 !folio_test_dirty(folio)) { 1073 start += folio_size(folio); 1074 folio_unlock(folio); 1075 goto search_again; 1076 } 1077 1078 if (folio_test_writeback(folio) || 1079 folio_test_fscache(folio)) { 1080 folio_unlock(folio); 1081 if (wbc->sync_mode != WB_SYNC_NONE) { 1082 folio_wait_writeback(folio); 1083 #ifdef CONFIG_FSCACHE 1084 folio_wait_fscache(folio); 1085 #endif 1086 goto lock_again; 1087 } 1088 1089 start += folio_size(folio); 1090 if (wbc->sync_mode == WB_SYNC_NONE) { 1091 if (skips >= 5 || need_resched()) { 1092 ret = 0; 1093 goto out; 1094 } 1095 skips++; 1096 } 1097 goto search_again; 1098 } 1099 1100 ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas, 1101 folio, start, end); 1102 out: 1103 if (ret > 0) 1104 *_start = start + ret; 1105 _leave(" = %zd [%llx]", ret, *_start); 1106 return ret; 1107 } 1108 1109 /* 1110 * Write a region of pages back to the server 1111 */ 1112 static int netfs_writepages_region(struct address_space *mapping, 1113 struct writeback_control *wbc, 1114 struct netfs_group *group, 1115 unsigned long long *_start, 1116 unsigned long long end) 1117 { 1118 ssize_t ret; 1119 1120 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 1121 1122 do { 1123 ret = netfs_writepages_begin(mapping, wbc, group, &xas, 1124 _start, end); 1125 if (ret > 0 && wbc->nr_to_write > 0) 1126 cond_resched(); 1127 } while (ret > 0 && wbc->nr_to_write > 0); 1128 1129 return ret > 0 ? 0 : ret; 1130 } 1131 1132 /* 1133 * write some of the pending data back to the server 1134 */ 1135 int netfs_writepages(struct address_space *mapping, 1136 struct writeback_control *wbc) 1137 { 1138 struct netfs_group *group = NULL; 1139 loff_t start, end; 1140 int ret; 1141 1142 _enter(""); 1143 1144 /* We have to be careful as we can end up racing with setattr() 1145 * truncating the pagecache since the caller doesn't take a lock here 1146 * to prevent it. 1147 */ 1148 1149 if (wbc->range_cyclic && mapping->writeback_index) { 1150 start = mapping->writeback_index * PAGE_SIZE; 1151 ret = netfs_writepages_region(mapping, wbc, group, 1152 &start, LLONG_MAX); 1153 if (ret < 0) 1154 goto out; 1155 1156 if (wbc->nr_to_write <= 0) { 1157 mapping->writeback_index = start / PAGE_SIZE; 1158 goto out; 1159 } 1160 1161 start = 0; 1162 end = mapping->writeback_index * PAGE_SIZE; 1163 mapping->writeback_index = 0; 1164 ret = netfs_writepages_region(mapping, wbc, group, &start, end); 1165 if (ret == 0) 1166 mapping->writeback_index = start / PAGE_SIZE; 1167 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 1168 start = 0; 1169 ret = netfs_writepages_region(mapping, wbc, group, 1170 &start, LLONG_MAX); 1171 if (wbc->nr_to_write > 0 && ret == 0) 1172 mapping->writeback_index = start / PAGE_SIZE; 1173 } else { 1174 start = wbc->range_start; 1175 ret = netfs_writepages_region(mapping, wbc, group, 1176 &start, wbc->range_end); 1177 } 1178 1179 out: 1180 _leave(" = %d", ret); 1181 return ret; 1182 } 1183 EXPORT_SYMBOL(netfs_writepages); 1184 1185 /* 1186 * Deal with the disposition of a laundered folio. 1187 */ 1188 static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq) 1189 { 1190 if (wreq->error) { 1191 pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error); 1192 mapping_set_error(wreq->mapping, wreq->error); 1193 } 1194 } 1195 1196 /** 1197 * netfs_launder_folio - Clean up a dirty folio that's being invalidated 1198 * @folio: The folio to clean 1199 * 1200 * This is called to write back a folio that's being invalidated when an inode 1201 * is getting torn down. Ideally, writepages would be used instead. 1202 */ 1203 int netfs_launder_folio(struct folio *folio) 1204 { 1205 struct netfs_io_request *wreq; 1206 struct address_space *mapping = folio->mapping; 1207 struct netfs_folio *finfo = netfs_folio_info(folio); 1208 struct netfs_group *group = netfs_folio_group(folio); 1209 struct bio_vec bvec; 1210 unsigned long long i_size = i_size_read(mapping->host); 1211 unsigned long long start = folio_pos(folio); 1212 size_t offset = 0, len; 1213 int ret = 0; 1214 1215 if (finfo) { 1216 offset = finfo->dirty_offset; 1217 start += offset; 1218 len = finfo->dirty_len; 1219 } else { 1220 len = folio_size(folio); 1221 } 1222 len = min_t(unsigned long long, len, i_size - start); 1223 1224 wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE); 1225 if (IS_ERR(wreq)) { 1226 ret = PTR_ERR(wreq); 1227 goto out; 1228 } 1229 1230 if (!folio_clear_dirty_for_io(folio)) 1231 goto out_put; 1232 1233 trace_netfs_folio(folio, netfs_folio_trace_launder); 1234 1235 _debug("launder %llx-%llx", start, start + len - 1); 1236 1237 /* Speculatively write to the cache. We have to fix this up later if 1238 * the store fails. 1239 */ 1240 wreq->cleanup = netfs_cleanup_launder_folio; 1241 1242 bvec_set_folio(&bvec, folio, len, offset); 1243 iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len); 1244 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 1245 ret = netfs_begin_write(wreq, true, netfs_write_trace_launder); 1246 1247 out_put: 1248 folio_detach_private(folio); 1249 netfs_put_group(group); 1250 kfree(finfo); 1251 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1252 out: 1253 folio_wait_fscache(folio); 1254 _leave(" = %d", ret); 1255 return ret; 1256 } 1257 EXPORT_SYMBOL(netfs_launder_folio); 1258