1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem high-level write support. 3 * 4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/export.h> 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/pagemap.h> 12 #include <linux/slab.h> 13 #include <linux/pagevec.h> 14 #include "internal.h" 15 16 /* 17 * Determined write method. Adjust netfs_folio_traces if this is changed. 18 */ 19 enum netfs_how_to_modify { 20 NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */ 21 NETFS_JUST_PREFETCH, /* We have to read the folio anyway */ 22 NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */ 23 NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */ 24 NETFS_STREAMING_WRITE, /* Store incomplete data in non-uptodate page. */ 25 NETFS_STREAMING_WRITE_CONT, /* Continue streaming write. */ 26 NETFS_FLUSH_CONTENT, /* Flush incompatible content. */ 27 }; 28 29 static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq); 30 31 static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) 32 { 33 if (netfs_group && !folio_get_private(folio)) 34 folio_attach_private(folio, netfs_get_group(netfs_group)); 35 } 36 37 #if IS_ENABLED(CONFIG_FSCACHE) 38 static void netfs_folio_start_fscache(bool caching, struct folio *folio) 39 { 40 if (caching) 41 folio_start_fscache(folio); 42 } 43 #else 44 static void netfs_folio_start_fscache(bool caching, struct folio *folio) 45 { 46 } 47 #endif 48 49 /* 50 * Decide how we should modify a folio. We might be attempting to do 51 * write-streaming, in which case we don't want to a local RMW cycle if we can 52 * avoid it. If we're doing local caching or content crypto, we award that 53 * priority over avoiding RMW. If the file is open readably, then we also 54 * assume that we may want to read what we wrote. 55 */ 56 static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, 57 struct file *file, 58 struct folio *folio, 59 void *netfs_group, 60 size_t flen, 61 size_t offset, 62 size_t len, 63 bool maybe_trouble) 64 { 65 struct netfs_folio *finfo = netfs_folio_info(folio); 66 loff_t pos = folio_file_pos(folio); 67 68 _enter(""); 69 70 if (netfs_folio_group(folio) != netfs_group) 71 return NETFS_FLUSH_CONTENT; 72 73 if (folio_test_uptodate(folio)) 74 return NETFS_FOLIO_IS_UPTODATE; 75 76 if (pos >= ctx->zero_point) 77 return NETFS_MODIFY_AND_CLEAR; 78 79 if (!maybe_trouble && offset == 0 && len >= flen) 80 return NETFS_WHOLE_FOLIO_MODIFY; 81 82 if (file->f_mode & FMODE_READ) 83 goto no_write_streaming; 84 if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 85 goto no_write_streaming; 86 87 if (netfs_is_cache_enabled(ctx)) { 88 /* We don't want to get a streaming write on a file that loses 89 * caching service temporarily because the backing store got 90 * culled. 91 */ 92 if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags)) 93 set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags); 94 goto no_write_streaming; 95 } 96 97 if (!finfo) 98 return NETFS_STREAMING_WRITE; 99 100 /* We can continue a streaming write only if it continues on from the 101 * previous. If it overlaps, we must flush lest we suffer a partial 102 * copy and disjoint dirty regions. 103 */ 104 if (offset == finfo->dirty_offset + finfo->dirty_len) 105 return NETFS_STREAMING_WRITE_CONT; 106 return NETFS_FLUSH_CONTENT; 107 108 no_write_streaming: 109 if (finfo) { 110 netfs_stat(&netfs_n_wh_wstream_conflict); 111 return NETFS_FLUSH_CONTENT; 112 } 113 return NETFS_JUST_PREFETCH; 114 } 115 116 /* 117 * Grab a folio for writing and lock it. Attempt to allocate as large a folio 118 * as possible to hold as much of the remaining length as possible in one go. 119 */ 120 static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, 121 loff_t pos, size_t part) 122 { 123 pgoff_t index = pos / PAGE_SIZE; 124 fgf_t fgp_flags = FGP_WRITEBEGIN; 125 126 if (mapping_large_folio_support(mapping)) 127 fgp_flags |= fgf_set_order(pos % PAGE_SIZE + part); 128 129 return __filemap_get_folio(mapping, index, fgp_flags, 130 mapping_gfp_mask(mapping)); 131 } 132 133 /** 134 * netfs_perform_write - Copy data into the pagecache. 135 * @iocb: The operation parameters 136 * @iter: The source buffer 137 * @netfs_group: Grouping for dirty pages (eg. ceph snaps). 138 * 139 * Copy data into pagecache pages attached to the inode specified by @iocb. 140 * The caller must hold appropriate inode locks. 141 * 142 * Dirty pages are tagged with a netfs_folio struct if they're not up to date 143 * to indicate the range modified. Dirty pages may also be tagged with a 144 * netfs-specific grouping such that data from an old group gets flushed before 145 * a new one is started. 146 */ 147 ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, 148 struct netfs_group *netfs_group) 149 { 150 struct file *file = iocb->ki_filp; 151 struct inode *inode = file_inode(file); 152 struct address_space *mapping = inode->i_mapping; 153 struct netfs_inode *ctx = netfs_inode(inode); 154 struct writeback_control wbc = { 155 .sync_mode = WB_SYNC_NONE, 156 .for_sync = true, 157 .nr_to_write = LONG_MAX, 158 .range_start = iocb->ki_pos, 159 .range_end = iocb->ki_pos + iter->count, 160 }; 161 struct netfs_io_request *wreq = NULL; 162 struct netfs_folio *finfo; 163 struct folio *folio; 164 enum netfs_how_to_modify howto; 165 enum netfs_folio_trace trace; 166 unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; 167 ssize_t written = 0, ret, ret2; 168 loff_t i_size, pos = iocb->ki_pos, from, to; 169 size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; 170 bool maybe_trouble = false; 171 172 if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || 173 iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) 174 ) { 175 wbc_attach_fdatawrite_inode(&wbc, mapping->host); 176 177 ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); 178 if (ret < 0) { 179 wbc_detach_inode(&wbc); 180 goto out; 181 } 182 183 wreq = netfs_begin_writethrough(iocb, iter->count); 184 if (IS_ERR(wreq)) { 185 wbc_detach_inode(&wbc); 186 ret = PTR_ERR(wreq); 187 wreq = NULL; 188 goto out; 189 } 190 if (!is_sync_kiocb(iocb)) 191 wreq->iocb = iocb; 192 wreq->cleanup = netfs_cleanup_buffered_write; 193 } 194 195 do { 196 size_t flen; 197 size_t offset; /* Offset into pagecache folio */ 198 size_t part; /* Bytes to write to folio */ 199 size_t copied; /* Bytes copied from user */ 200 201 ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); 202 if (unlikely(ret < 0)) 203 break; 204 205 offset = pos & (max_chunk - 1); 206 part = min(max_chunk - offset, iov_iter_count(iter)); 207 208 /* Bring in the user pages that we will copy from _first_ lest 209 * we hit a nasty deadlock on copying from the same page as 210 * we're writing to, without it being marked uptodate. 211 * 212 * Not only is this an optimisation, but it is also required to 213 * check that the address is actually valid, when atomic 214 * usercopies are used below. 215 * 216 * We rely on the page being held onto long enough by the LRU 217 * that we can grab it below if this causes it to be read. 218 */ 219 ret = -EFAULT; 220 if (unlikely(fault_in_iov_iter_readable(iter, part) == part)) 221 break; 222 223 folio = netfs_grab_folio_for_write(mapping, pos, part); 224 if (IS_ERR(folio)) { 225 ret = PTR_ERR(folio); 226 break; 227 } 228 229 flen = folio_size(folio); 230 offset = pos & (flen - 1); 231 part = min_t(size_t, flen - offset, part); 232 233 if (signal_pending(current)) { 234 ret = written ? -EINTR : -ERESTARTSYS; 235 goto error_folio_unlock; 236 } 237 238 /* See if we need to prefetch the area we're going to modify. 239 * We need to do this before we get a lock on the folio in case 240 * there's more than one writer competing for the same cache 241 * block. 242 */ 243 howto = netfs_how_to_modify(ctx, file, folio, netfs_group, 244 flen, offset, part, maybe_trouble); 245 _debug("howto %u", howto); 246 switch (howto) { 247 case NETFS_JUST_PREFETCH: 248 ret = netfs_prefetch_for_write(file, folio, offset, part); 249 if (ret < 0) { 250 _debug("prefetch = %zd", ret); 251 goto error_folio_unlock; 252 } 253 break; 254 case NETFS_FOLIO_IS_UPTODATE: 255 case NETFS_WHOLE_FOLIO_MODIFY: 256 case NETFS_STREAMING_WRITE_CONT: 257 break; 258 case NETFS_MODIFY_AND_CLEAR: 259 zero_user_segment(&folio->page, 0, offset); 260 break; 261 case NETFS_STREAMING_WRITE: 262 ret = -EIO; 263 if (WARN_ON(folio_get_private(folio))) 264 goto error_folio_unlock; 265 break; 266 case NETFS_FLUSH_CONTENT: 267 trace_netfs_folio(folio, netfs_flush_content); 268 from = folio_pos(folio); 269 to = from + folio_size(folio) - 1; 270 folio_unlock(folio); 271 folio_put(folio); 272 ret = filemap_write_and_wait_range(mapping, from, to); 273 if (ret < 0) 274 goto error_folio_unlock; 275 continue; 276 } 277 278 if (mapping_writably_mapped(mapping)) 279 flush_dcache_folio(folio); 280 281 copied = copy_folio_from_iter_atomic(folio, offset, part, iter); 282 283 flush_dcache_folio(folio); 284 285 /* Deal with a (partially) failed copy */ 286 if (copied == 0) { 287 ret = -EFAULT; 288 goto error_folio_unlock; 289 } 290 291 trace = (enum netfs_folio_trace)howto; 292 switch (howto) { 293 case NETFS_FOLIO_IS_UPTODATE: 294 case NETFS_JUST_PREFETCH: 295 netfs_set_group(folio, netfs_group); 296 break; 297 case NETFS_MODIFY_AND_CLEAR: 298 zero_user_segment(&folio->page, offset + copied, flen); 299 netfs_set_group(folio, netfs_group); 300 folio_mark_uptodate(folio); 301 break; 302 case NETFS_WHOLE_FOLIO_MODIFY: 303 if (unlikely(copied < part)) { 304 maybe_trouble = true; 305 iov_iter_revert(iter, copied); 306 copied = 0; 307 goto retry; 308 } 309 netfs_set_group(folio, netfs_group); 310 folio_mark_uptodate(folio); 311 break; 312 case NETFS_STREAMING_WRITE: 313 if (offset == 0 && copied == flen) { 314 netfs_set_group(folio, netfs_group); 315 folio_mark_uptodate(folio); 316 trace = netfs_streaming_filled_page; 317 break; 318 } 319 finfo = kzalloc(sizeof(*finfo), GFP_KERNEL); 320 if (!finfo) { 321 iov_iter_revert(iter, copied); 322 ret = -ENOMEM; 323 goto error_folio_unlock; 324 } 325 finfo->netfs_group = netfs_get_group(netfs_group); 326 finfo->dirty_offset = offset; 327 finfo->dirty_len = copied; 328 folio_attach_private(folio, (void *)((unsigned long)finfo | 329 NETFS_FOLIO_INFO)); 330 break; 331 case NETFS_STREAMING_WRITE_CONT: 332 finfo = netfs_folio_info(folio); 333 finfo->dirty_len += copied; 334 if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) { 335 if (finfo->netfs_group) 336 folio_change_private(folio, finfo->netfs_group); 337 else 338 folio_detach_private(folio); 339 folio_mark_uptodate(folio); 340 kfree(finfo); 341 trace = netfs_streaming_cont_filled_page; 342 } 343 break; 344 default: 345 WARN(true, "Unexpected modify type %u ix=%lx\n", 346 howto, folio->index); 347 ret = -EIO; 348 goto error_folio_unlock; 349 } 350 351 trace_netfs_folio(folio, trace); 352 353 /* Update the inode size if we moved the EOF marker */ 354 i_size = i_size_read(inode); 355 pos += copied; 356 if (pos > i_size) { 357 if (ctx->ops->update_i_size) { 358 ctx->ops->update_i_size(inode, pos); 359 } else { 360 i_size_write(inode, pos); 361 #if IS_ENABLED(CONFIG_FSCACHE) 362 fscache_update_cookie(ctx->cache, NULL, &pos); 363 #endif 364 } 365 } 366 written += copied; 367 368 if (likely(!wreq)) { 369 folio_mark_dirty(folio); 370 } else { 371 if (folio_test_dirty(folio)) 372 /* Sigh. mmap. */ 373 folio_clear_dirty_for_io(folio); 374 /* We make multiple writes to the folio... */ 375 if (!folio_test_writeback(folio)) { 376 folio_wait_fscache(folio); 377 folio_start_writeback(folio); 378 folio_start_fscache(folio); 379 if (wreq->iter.count == 0) 380 trace_netfs_folio(folio, netfs_folio_trace_wthru); 381 else 382 trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 383 } 384 netfs_advance_writethrough(wreq, copied, 385 offset + copied == flen); 386 } 387 retry: 388 folio_unlock(folio); 389 folio_put(folio); 390 folio = NULL; 391 392 cond_resched(); 393 } while (iov_iter_count(iter)); 394 395 out: 396 if (unlikely(wreq)) { 397 ret2 = netfs_end_writethrough(wreq, iocb); 398 wbc_detach_inode(&wbc); 399 if (ret2 == -EIOCBQUEUED) 400 return ret2; 401 if (ret == 0) 402 ret = ret2; 403 } 404 405 iocb->ki_pos += written; 406 _leave(" = %zd [%zd]", written, ret); 407 return written ? written : ret; 408 409 error_folio_unlock: 410 folio_unlock(folio); 411 folio_put(folio); 412 goto out; 413 } 414 EXPORT_SYMBOL(netfs_perform_write); 415 416 /** 417 * netfs_buffered_write_iter_locked - write data to a file 418 * @iocb: IO state structure (file, offset, etc.) 419 * @from: iov_iter with data to write 420 * @netfs_group: Grouping for dirty pages (eg. ceph snaps). 421 * 422 * This function does all the work needed for actually writing data to a 423 * file. It does all basic checks, removes SUID from the file, updates 424 * modification times and calls proper subroutines depending on whether we 425 * do direct IO or a standard buffered write. 426 * 427 * The caller must hold appropriate locks around this function and have called 428 * generic_write_checks() already. The caller is also responsible for doing 429 * any necessary syncing afterwards. 430 * 431 * This function does *not* take care of syncing data in case of O_SYNC write. 432 * A caller has to handle it. This is mainly due to the fact that we want to 433 * avoid syncing under i_rwsem. 434 * 435 * Return: 436 * * number of bytes written, even for truncated writes 437 * * negative error code if no data has been written at all 438 */ 439 ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, 440 struct netfs_group *netfs_group) 441 { 442 struct file *file = iocb->ki_filp; 443 ssize_t ret; 444 445 trace_netfs_write_iter(iocb, from); 446 447 ret = file_remove_privs(file); 448 if (ret) 449 return ret; 450 451 ret = file_update_time(file); 452 if (ret) 453 return ret; 454 455 return netfs_perform_write(iocb, from, netfs_group); 456 } 457 EXPORT_SYMBOL(netfs_buffered_write_iter_locked); 458 459 /** 460 * netfs_file_write_iter - write data to a file 461 * @iocb: IO state structure 462 * @from: iov_iter with data to write 463 * 464 * Perform a write to a file, writing into the pagecache if possible and doing 465 * an unbuffered write instead if not. 466 * 467 * Return: 468 * * Negative error code if no data has been written at all of 469 * vfs_fsync_range() failed for a synchronous write 470 * * Number of bytes written, even for truncated writes 471 */ 472 ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 473 { 474 struct file *file = iocb->ki_filp; 475 struct inode *inode = file->f_mapping->host; 476 struct netfs_inode *ictx = netfs_inode(inode); 477 ssize_t ret; 478 479 _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); 480 481 if (!iov_iter_count(from)) 482 return 0; 483 484 if ((iocb->ki_flags & IOCB_DIRECT) || 485 test_bit(NETFS_ICTX_UNBUFFERED, &ictx->flags)) 486 return netfs_unbuffered_write_iter(iocb, from); 487 488 ret = netfs_start_io_write(inode); 489 if (ret < 0) 490 return ret; 491 492 ret = generic_write_checks(iocb, from); 493 if (ret > 0) 494 ret = netfs_buffered_write_iter_locked(iocb, from, NULL); 495 netfs_end_io_write(inode); 496 if (ret > 0) 497 ret = generic_write_sync(iocb, ret); 498 return ret; 499 } 500 EXPORT_SYMBOL(netfs_file_write_iter); 501 502 /* 503 * Notification that a previously read-only page is about to become writable. 504 * Note that the caller indicates a single page of a multipage folio. 505 */ 506 vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group) 507 { 508 struct folio *folio = page_folio(vmf->page); 509 struct file *file = vmf->vma->vm_file; 510 struct inode *inode = file_inode(file); 511 vm_fault_t ret = VM_FAULT_RETRY; 512 int err; 513 514 _enter("%lx", folio->index); 515 516 sb_start_pagefault(inode->i_sb); 517 518 if (folio_wait_writeback_killable(folio)) 519 goto out; 520 521 if (folio_lock_killable(folio) < 0) 522 goto out; 523 524 /* Can we see a streaming write here? */ 525 if (WARN_ON(!folio_test_uptodate(folio))) { 526 ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED; 527 goto out; 528 } 529 530 if (netfs_folio_group(folio) != netfs_group) { 531 folio_unlock(folio); 532 err = filemap_fdatawait_range(inode->i_mapping, 533 folio_pos(folio), 534 folio_pos(folio) + folio_size(folio)); 535 switch (err) { 536 case 0: 537 ret = VM_FAULT_RETRY; 538 goto out; 539 case -ENOMEM: 540 ret = VM_FAULT_OOM; 541 goto out; 542 default: 543 ret = VM_FAULT_SIGBUS; 544 goto out; 545 } 546 } 547 548 if (folio_test_dirty(folio)) 549 trace_netfs_folio(folio, netfs_folio_trace_mkwrite_plus); 550 else 551 trace_netfs_folio(folio, netfs_folio_trace_mkwrite); 552 netfs_set_group(folio, netfs_group); 553 file_update_time(file); 554 ret = VM_FAULT_LOCKED; 555 out: 556 sb_end_pagefault(inode->i_sb); 557 return ret; 558 } 559 EXPORT_SYMBOL(netfs_page_mkwrite); 560 561 /* 562 * Kill all the pages in the given range 563 */ 564 static void netfs_kill_pages(struct address_space *mapping, 565 loff_t start, loff_t len) 566 { 567 struct folio *folio; 568 pgoff_t index = start / PAGE_SIZE; 569 pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 570 571 _enter("%llx-%llx", start, start + len - 1); 572 573 do { 574 _debug("kill %lx (to %lx)", index, last); 575 576 folio = filemap_get_folio(mapping, index); 577 if (IS_ERR(folio)) { 578 next = index + 1; 579 continue; 580 } 581 582 next = folio_next_index(folio); 583 584 trace_netfs_folio(folio, netfs_folio_trace_kill); 585 folio_clear_uptodate(folio); 586 if (folio_test_fscache(folio)) 587 folio_end_fscache(folio); 588 folio_end_writeback(folio); 589 folio_lock(folio); 590 generic_error_remove_folio(mapping, folio); 591 folio_unlock(folio); 592 folio_put(folio); 593 594 } while (index = next, index <= last); 595 596 _leave(""); 597 } 598 599 /* 600 * Redirty all the pages in a given range. 601 */ 602 static void netfs_redirty_pages(struct address_space *mapping, 603 loff_t start, loff_t len) 604 { 605 struct folio *folio; 606 pgoff_t index = start / PAGE_SIZE; 607 pgoff_t last = (start + len - 1) / PAGE_SIZE, next; 608 609 _enter("%llx-%llx", start, start + len - 1); 610 611 do { 612 _debug("redirty %llx @%llx", len, start); 613 614 folio = filemap_get_folio(mapping, index); 615 if (IS_ERR(folio)) { 616 next = index + 1; 617 continue; 618 } 619 620 next = folio_next_index(folio); 621 trace_netfs_folio(folio, netfs_folio_trace_redirty); 622 filemap_dirty_folio(mapping, folio); 623 if (folio_test_fscache(folio)) 624 folio_end_fscache(folio); 625 folio_end_writeback(folio); 626 folio_put(folio); 627 } while (index = next, index <= last); 628 629 balance_dirty_pages_ratelimited(mapping); 630 631 _leave(""); 632 } 633 634 /* 635 * Completion of write to server 636 */ 637 static void netfs_pages_written_back(struct netfs_io_request *wreq) 638 { 639 struct address_space *mapping = wreq->mapping; 640 struct netfs_folio *finfo; 641 struct netfs_group *group = NULL; 642 struct folio *folio; 643 pgoff_t last; 644 int gcount = 0; 645 646 XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE); 647 648 _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 649 650 rcu_read_lock(); 651 652 last = (wreq->start + wreq->len - 1) / PAGE_SIZE; 653 xas_for_each(&xas, folio, last) { 654 WARN(!folio_test_writeback(folio), 655 "bad %zx @%llx page %lx %lx\n", 656 wreq->len, wreq->start, folio->index, last); 657 658 if ((finfo = netfs_folio_info(folio))) { 659 /* Streaming writes cannot be redirtied whilst under 660 * writeback, so discard the streaming record. 661 */ 662 folio_detach_private(folio); 663 group = finfo->netfs_group; 664 gcount++; 665 trace_netfs_folio(folio, netfs_folio_trace_clear_s); 666 kfree(finfo); 667 } else if ((group = netfs_folio_group(folio))) { 668 /* Need to detach the group pointer if the page didn't 669 * get redirtied. If it has been redirtied, then it 670 * must be within the same group. 671 */ 672 if (folio_test_dirty(folio)) { 673 trace_netfs_folio(folio, netfs_folio_trace_redirtied); 674 goto end_wb; 675 } 676 if (folio_trylock(folio)) { 677 if (!folio_test_dirty(folio)) { 678 folio_detach_private(folio); 679 gcount++; 680 trace_netfs_folio(folio, netfs_folio_trace_clear_g); 681 } else { 682 trace_netfs_folio(folio, netfs_folio_trace_redirtied); 683 } 684 folio_unlock(folio); 685 goto end_wb; 686 } 687 688 xas_pause(&xas); 689 rcu_read_unlock(); 690 folio_lock(folio); 691 if (!folio_test_dirty(folio)) { 692 folio_detach_private(folio); 693 gcount++; 694 trace_netfs_folio(folio, netfs_folio_trace_clear_g); 695 } else { 696 trace_netfs_folio(folio, netfs_folio_trace_redirtied); 697 } 698 folio_unlock(folio); 699 rcu_read_lock(); 700 } else { 701 trace_netfs_folio(folio, netfs_folio_trace_clear); 702 } 703 end_wb: 704 if (folio_test_fscache(folio)) 705 folio_end_fscache(folio); 706 xas_advance(&xas, folio_next_index(folio) - 1); 707 folio_end_writeback(folio); 708 } 709 710 rcu_read_unlock(); 711 netfs_put_group_many(group, gcount); 712 _leave(""); 713 } 714 715 /* 716 * Deal with the disposition of the folios that are under writeback to close 717 * out the operation. 718 */ 719 static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq) 720 { 721 struct address_space *mapping = wreq->mapping; 722 723 _enter(""); 724 725 switch (wreq->error) { 726 case 0: 727 netfs_pages_written_back(wreq); 728 break; 729 730 default: 731 pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error); 732 fallthrough; 733 case -EACCES: 734 case -EPERM: 735 case -ENOKEY: 736 case -EKEYEXPIRED: 737 case -EKEYREJECTED: 738 case -EKEYREVOKED: 739 case -ENETRESET: 740 case -EDQUOT: 741 case -ENOSPC: 742 netfs_redirty_pages(mapping, wreq->start, wreq->len); 743 break; 744 745 case -EROFS: 746 case -EIO: 747 case -EREMOTEIO: 748 case -EFBIG: 749 case -ENOENT: 750 case -ENOMEDIUM: 751 case -ENXIO: 752 netfs_kill_pages(mapping, wreq->start, wreq->len); 753 break; 754 } 755 756 if (wreq->error) 757 mapping_set_error(mapping, wreq->error); 758 if (wreq->netfs_ops->done) 759 wreq->netfs_ops->done(wreq); 760 } 761 762 /* 763 * Extend the region to be written back to include subsequent contiguously 764 * dirty pages if possible, but don't sleep while doing so. 765 * 766 * If this page holds new content, then we can include filler zeros in the 767 * writeback. 768 */ 769 static void netfs_extend_writeback(struct address_space *mapping, 770 struct netfs_group *group, 771 struct xa_state *xas, 772 long *_count, 773 loff_t start, 774 loff_t max_len, 775 bool caching, 776 size_t *_len, 777 size_t *_top) 778 { 779 struct netfs_folio *finfo; 780 struct folio_batch fbatch; 781 struct folio *folio; 782 unsigned int i; 783 pgoff_t index = (start + *_len) / PAGE_SIZE; 784 size_t len; 785 void *priv; 786 bool stop = true; 787 788 folio_batch_init(&fbatch); 789 790 do { 791 /* Firstly, we gather up a batch of contiguous dirty pages 792 * under the RCU read lock - but we can't clear the dirty flags 793 * there if any of those pages are mapped. 794 */ 795 rcu_read_lock(); 796 797 xas_for_each(xas, folio, ULONG_MAX) { 798 stop = true; 799 if (xas_retry(xas, folio)) 800 continue; 801 if (xa_is_value(folio)) 802 break; 803 if (folio->index != index) { 804 xas_reset(xas); 805 break; 806 } 807 808 if (!folio_try_get_rcu(folio)) { 809 xas_reset(xas); 810 continue; 811 } 812 813 /* Has the folio moved or been split? */ 814 if (unlikely(folio != xas_reload(xas))) { 815 folio_put(folio); 816 xas_reset(xas); 817 break; 818 } 819 820 if (!folio_trylock(folio)) { 821 folio_put(folio); 822 xas_reset(xas); 823 break; 824 } 825 if (!folio_test_dirty(folio) || 826 folio_test_writeback(folio) || 827 folio_test_fscache(folio)) { 828 folio_unlock(folio); 829 folio_put(folio); 830 xas_reset(xas); 831 break; 832 } 833 834 stop = false; 835 len = folio_size(folio); 836 priv = folio_get_private(folio); 837 if ((const struct netfs_group *)priv != group) { 838 stop = true; 839 finfo = netfs_folio_info(folio); 840 if (finfo->netfs_group != group || 841 finfo->dirty_offset > 0) { 842 folio_unlock(folio); 843 folio_put(folio); 844 xas_reset(xas); 845 break; 846 } 847 len = finfo->dirty_len; 848 } 849 850 *_top += folio_size(folio); 851 index += folio_nr_pages(folio); 852 *_count -= folio_nr_pages(folio); 853 *_len += len; 854 if (*_len >= max_len || *_count <= 0) 855 stop = true; 856 857 if (!folio_batch_add(&fbatch, folio)) 858 break; 859 if (stop) 860 break; 861 } 862 863 xas_pause(xas); 864 rcu_read_unlock(); 865 866 /* Now, if we obtained any folios, we can shift them to being 867 * writable and mark them for caching. 868 */ 869 if (!folio_batch_count(&fbatch)) 870 break; 871 872 for (i = 0; i < folio_batch_count(&fbatch); i++) { 873 folio = fbatch.folios[i]; 874 trace_netfs_folio(folio, netfs_folio_trace_store_plus); 875 876 if (!folio_clear_dirty_for_io(folio)) 877 BUG(); 878 folio_start_writeback(folio); 879 netfs_folio_start_fscache(caching, folio); 880 folio_unlock(folio); 881 } 882 883 folio_batch_release(&fbatch); 884 cond_resched(); 885 } while (!stop); 886 } 887 888 /* 889 * Synchronously write back the locked page and any subsequent non-locked dirty 890 * pages. 891 */ 892 static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, 893 struct writeback_control *wbc, 894 struct netfs_group *group, 895 struct xa_state *xas, 896 struct folio *folio, 897 unsigned long long start, 898 unsigned long long end) 899 { 900 struct netfs_io_request *wreq; 901 struct netfs_folio *finfo; 902 struct netfs_inode *ctx = netfs_inode(mapping->host); 903 unsigned long long i_size = i_size_read(&ctx->inode); 904 size_t len, max_len; 905 bool caching = netfs_is_cache_enabled(ctx); 906 long count = wbc->nr_to_write; 907 int ret; 908 909 _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching); 910 911 wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), 912 NETFS_WRITEBACK); 913 if (IS_ERR(wreq)) { 914 folio_unlock(folio); 915 return PTR_ERR(wreq); 916 } 917 918 if (!folio_clear_dirty_for_io(folio)) 919 BUG(); 920 folio_start_writeback(folio); 921 netfs_folio_start_fscache(caching, folio); 922 923 count -= folio_nr_pages(folio); 924 925 /* Find all consecutive lockable dirty pages that have contiguous 926 * written regions, stopping when we find a page that is not 927 * immediately lockable, is not dirty or is missing, or we reach the 928 * end of the range. 929 */ 930 trace_netfs_folio(folio, netfs_folio_trace_store); 931 932 len = wreq->len; 933 finfo = netfs_folio_info(folio); 934 if (finfo) { 935 start += finfo->dirty_offset; 936 if (finfo->dirty_offset + finfo->dirty_len != len) { 937 len = finfo->dirty_len; 938 goto cant_expand; 939 } 940 len = finfo->dirty_len; 941 } 942 943 if (start < i_size) { 944 /* Trim the write to the EOF; the extra data is ignored. Also 945 * put an upper limit on the size of a single storedata op. 946 */ 947 max_len = 65536 * 4096; 948 max_len = min_t(unsigned long long, max_len, end - start + 1); 949 max_len = min_t(unsigned long long, max_len, i_size - start); 950 951 if (len < max_len) 952 netfs_extend_writeback(mapping, group, xas, &count, start, 953 max_len, caching, &len, &wreq->upper_len); 954 } 955 956 cant_expand: 957 len = min_t(unsigned long long, len, i_size - start); 958 959 /* We now have a contiguous set of dirty pages, each with writeback 960 * set; the first page is still locked at this point, but all the rest 961 * have been unlocked. 962 */ 963 folio_unlock(folio); 964 wreq->start = start; 965 wreq->len = len; 966 967 if (start < i_size) { 968 _debug("write back %zx @%llx [%llx]", len, start, i_size); 969 970 /* Speculatively write to the cache. We have to fix this up 971 * later if the store fails. 972 */ 973 wreq->cleanup = netfs_cleanup_buffered_write; 974 975 iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start, 976 wreq->upper_len); 977 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 978 ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback); 979 if (ret == 0 || ret == -EIOCBQUEUED) 980 wbc->nr_to_write -= len / PAGE_SIZE; 981 } else { 982 _debug("write discard %zx @%llx [%llx]", len, start, i_size); 983 984 /* The dirty region was entirely beyond the EOF. */ 985 fscache_clear_page_bits(mapping, start, len, caching); 986 netfs_pages_written_back(wreq); 987 ret = 0; 988 } 989 990 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 991 _leave(" = 1"); 992 return 1; 993 } 994 995 /* 996 * Write a region of pages back to the server 997 */ 998 static ssize_t netfs_writepages_begin(struct address_space *mapping, 999 struct writeback_control *wbc, 1000 struct netfs_group *group, 1001 struct xa_state *xas, 1002 unsigned long long *_start, 1003 unsigned long long end) 1004 { 1005 const struct netfs_folio *finfo; 1006 struct folio *folio; 1007 unsigned long long start = *_start; 1008 ssize_t ret; 1009 void *priv; 1010 int skips = 0; 1011 1012 _enter("%llx,%llx,", start, end); 1013 1014 search_again: 1015 /* Find the first dirty page in the group. */ 1016 rcu_read_lock(); 1017 1018 for (;;) { 1019 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY); 1020 if (xas_retry(xas, folio) || xa_is_value(folio)) 1021 continue; 1022 if (!folio) 1023 break; 1024 1025 if (!folio_try_get_rcu(folio)) { 1026 xas_reset(xas); 1027 continue; 1028 } 1029 1030 if (unlikely(folio != xas_reload(xas))) { 1031 folio_put(folio); 1032 xas_reset(xas); 1033 continue; 1034 } 1035 1036 /* Skip any dirty folio that's not in the group of interest. */ 1037 priv = folio_get_private(folio); 1038 if ((const struct netfs_group *)priv != group) { 1039 finfo = netfs_folio_info(folio); 1040 if (finfo->netfs_group != group) { 1041 folio_put(folio); 1042 continue; 1043 } 1044 } 1045 1046 xas_pause(xas); 1047 break; 1048 } 1049 rcu_read_unlock(); 1050 if (!folio) 1051 return 0; 1052 1053 start = folio_pos(folio); /* May regress with THPs */ 1054 1055 _debug("wback %lx", folio->index); 1056 1057 /* At this point we hold neither the i_pages lock nor the page lock: 1058 * the page may be truncated or invalidated (changing page->mapping to 1059 * NULL), or even swizzled back from swapper_space to tmpfs file 1060 * mapping 1061 */ 1062 lock_again: 1063 if (wbc->sync_mode != WB_SYNC_NONE) { 1064 ret = folio_lock_killable(folio); 1065 if (ret < 0) 1066 return ret; 1067 } else { 1068 if (!folio_trylock(folio)) 1069 goto search_again; 1070 } 1071 1072 if (folio->mapping != mapping || 1073 !folio_test_dirty(folio)) { 1074 start += folio_size(folio); 1075 folio_unlock(folio); 1076 goto search_again; 1077 } 1078 1079 if (folio_test_writeback(folio) || 1080 folio_test_fscache(folio)) { 1081 folio_unlock(folio); 1082 if (wbc->sync_mode != WB_SYNC_NONE) { 1083 folio_wait_writeback(folio); 1084 #ifdef CONFIG_FSCACHE 1085 folio_wait_fscache(folio); 1086 #endif 1087 goto lock_again; 1088 } 1089 1090 start += folio_size(folio); 1091 if (wbc->sync_mode == WB_SYNC_NONE) { 1092 if (skips >= 5 || need_resched()) { 1093 ret = 0; 1094 goto out; 1095 } 1096 skips++; 1097 } 1098 goto search_again; 1099 } 1100 1101 ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas, 1102 folio, start, end); 1103 out: 1104 if (ret > 0) 1105 *_start = start + ret; 1106 _leave(" = %zd [%llx]", ret, *_start); 1107 return ret; 1108 } 1109 1110 /* 1111 * Write a region of pages back to the server 1112 */ 1113 static int netfs_writepages_region(struct address_space *mapping, 1114 struct writeback_control *wbc, 1115 struct netfs_group *group, 1116 unsigned long long *_start, 1117 unsigned long long end) 1118 { 1119 ssize_t ret; 1120 1121 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE); 1122 1123 do { 1124 ret = netfs_writepages_begin(mapping, wbc, group, &xas, 1125 _start, end); 1126 if (ret > 0 && wbc->nr_to_write > 0) 1127 cond_resched(); 1128 } while (ret > 0 && wbc->nr_to_write > 0); 1129 1130 return ret > 0 ? 0 : ret; 1131 } 1132 1133 /* 1134 * write some of the pending data back to the server 1135 */ 1136 int netfs_writepages(struct address_space *mapping, 1137 struct writeback_control *wbc) 1138 { 1139 struct netfs_group *group = NULL; 1140 loff_t start, end; 1141 int ret; 1142 1143 _enter(""); 1144 1145 /* We have to be careful as we can end up racing with setattr() 1146 * truncating the pagecache since the caller doesn't take a lock here 1147 * to prevent it. 1148 */ 1149 1150 if (wbc->range_cyclic && mapping->writeback_index) { 1151 start = mapping->writeback_index * PAGE_SIZE; 1152 ret = netfs_writepages_region(mapping, wbc, group, 1153 &start, LLONG_MAX); 1154 if (ret < 0) 1155 goto out; 1156 1157 if (wbc->nr_to_write <= 0) { 1158 mapping->writeback_index = start / PAGE_SIZE; 1159 goto out; 1160 } 1161 1162 start = 0; 1163 end = mapping->writeback_index * PAGE_SIZE; 1164 mapping->writeback_index = 0; 1165 ret = netfs_writepages_region(mapping, wbc, group, &start, end); 1166 if (ret == 0) 1167 mapping->writeback_index = start / PAGE_SIZE; 1168 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 1169 start = 0; 1170 ret = netfs_writepages_region(mapping, wbc, group, 1171 &start, LLONG_MAX); 1172 if (wbc->nr_to_write > 0 && ret == 0) 1173 mapping->writeback_index = start / PAGE_SIZE; 1174 } else { 1175 start = wbc->range_start; 1176 ret = netfs_writepages_region(mapping, wbc, group, 1177 &start, wbc->range_end); 1178 } 1179 1180 out: 1181 _leave(" = %d", ret); 1182 return ret; 1183 } 1184 EXPORT_SYMBOL(netfs_writepages); 1185 1186 /* 1187 * Deal with the disposition of a laundered folio. 1188 */ 1189 static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq) 1190 { 1191 if (wreq->error) { 1192 pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error); 1193 mapping_set_error(wreq->mapping, wreq->error); 1194 } 1195 } 1196 1197 /** 1198 * netfs_launder_folio - Clean up a dirty folio that's being invalidated 1199 * @folio: The folio to clean 1200 * 1201 * This is called to write back a folio that's being invalidated when an inode 1202 * is getting torn down. Ideally, writepages would be used instead. 1203 */ 1204 int netfs_launder_folio(struct folio *folio) 1205 { 1206 struct netfs_io_request *wreq; 1207 struct address_space *mapping = folio->mapping; 1208 struct netfs_folio *finfo = netfs_folio_info(folio); 1209 struct netfs_group *group = netfs_folio_group(folio); 1210 struct bio_vec bvec; 1211 unsigned long long i_size = i_size_read(mapping->host); 1212 unsigned long long start = folio_pos(folio); 1213 size_t offset = 0, len; 1214 int ret = 0; 1215 1216 if (finfo) { 1217 offset = finfo->dirty_offset; 1218 start += offset; 1219 len = finfo->dirty_len; 1220 } else { 1221 len = folio_size(folio); 1222 } 1223 len = min_t(unsigned long long, len, i_size - start); 1224 1225 wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE); 1226 if (IS_ERR(wreq)) { 1227 ret = PTR_ERR(wreq); 1228 goto out; 1229 } 1230 1231 if (!folio_clear_dirty_for_io(folio)) 1232 goto out_put; 1233 1234 trace_netfs_folio(folio, netfs_folio_trace_launder); 1235 1236 _debug("launder %llx-%llx", start, start + len - 1); 1237 1238 /* Speculatively write to the cache. We have to fix this up later if 1239 * the store fails. 1240 */ 1241 wreq->cleanup = netfs_cleanup_launder_folio; 1242 1243 bvec_set_folio(&bvec, folio, len, offset); 1244 iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len); 1245 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 1246 ret = netfs_begin_write(wreq, true, netfs_write_trace_launder); 1247 1248 out_put: 1249 folio_detach_private(folio); 1250 netfs_put_group(group); 1251 kfree(finfo); 1252 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 1253 out: 1254 folio_wait_fscache(folio); 1255 _leave(" = %d", ret); 1256 return ret; 1257 } 1258 EXPORT_SYMBOL(netfs_launder_folio); 1259