1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem high-level (buffered) writeback. 3 * 4 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 * 7 * 8 * To support network filesystems with local caching, we manage a situation 9 * that can be envisioned like the following: 10 * 11 * +---+---+-----+-----+---+----------+ 12 * Folios: | | | | | | | 13 * +---+---+-----+-----+---+----------+ 14 * 15 * +------+------+ +----+----+ 16 * Upload: | | |.....| | | 17 * (Stream 0) +------+------+ +----+----+ 18 * 19 * +------+------+------+------+------+ 20 * Cache: | | | | | | 21 * (Stream 1) +------+------+------+------+------+ 22 * 23 * Where we have a sequence of folios of varying sizes that we need to overlay 24 * with multiple parallel streams of I/O requests, where the I/O requests in a 25 * stream may also be of various sizes (in cifs, for example, the sizes are 26 * negotiated with the server; in something like ceph, they may represent the 27 * sizes of storage objects). 28 * 29 * The sequence in each stream may contain gaps and noncontiguous subrequests 30 * may be glued together into single vectored write RPCs. 31 */ 32 33 #include <linux/export.h> 34 #include <linux/fs.h> 35 #include <linux/mm.h> 36 #include <linux/pagemap.h> 37 #include "internal.h" 38 39 /* 40 * Kill all dirty folios in the event of an unrecoverable error, starting with 41 * a locked folio we've already obtained from writeback_iter(). 42 */ 43 static void netfs_kill_dirty_pages(struct address_space *mapping, 44 struct writeback_control *wbc, 45 struct folio *folio) 46 { 47 int error = 0; 48 49 do { 50 enum netfs_folio_trace why = netfs_folio_trace_kill; 51 struct netfs_group *group = NULL; 52 struct netfs_folio *finfo = NULL; 53 void *priv; 54 55 priv = folio_detach_private(folio); 56 if (priv) { 57 finfo = __netfs_folio_info(priv); 58 if (finfo) { 59 /* Kill folio from streaming write. */ 60 group = finfo->netfs_group; 61 why = netfs_folio_trace_kill_s; 62 } else { 63 group = priv; 64 if (group == NETFS_FOLIO_COPY_TO_CACHE) { 65 /* Kill copy-to-cache folio */ 66 why = netfs_folio_trace_kill_cc; 67 group = NULL; 68 } else { 69 /* Kill folio with group */ 70 why = netfs_folio_trace_kill_g; 71 } 72 } 73 } 74 75 trace_netfs_folio(folio, why); 76 77 folio_start_writeback(folio); 78 folio_unlock(folio); 79 folio_end_writeback(folio); 80 81 netfs_put_group(group); 82 kfree(finfo); 83 84 } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 85 } 86 87 /* 88 * Create a write request and set it up appropriately for the origin type. 89 */ 90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, 91 struct file *file, 92 loff_t start, 93 enum netfs_io_origin origin) 94 { 95 struct netfs_io_request *wreq; 96 struct netfs_inode *ictx; 97 bool is_cacheable = (origin == NETFS_WRITEBACK || 98 origin == NETFS_WRITEBACK_SINGLE || 99 origin == NETFS_WRITETHROUGH || 100 origin == NETFS_PGPRIV2_COPY_TO_CACHE); 101 102 wreq = netfs_alloc_request(mapping, file, start, 0, origin); 103 if (IS_ERR(wreq)) 104 return wreq; 105 106 _enter("R=%x", wreq->debug_id); 107 108 ictx = netfs_inode(wreq->inode); 109 if (is_cacheable && netfs_is_cache_enabled(ictx)) 110 fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); 111 if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0) 112 goto nomem; 113 114 wreq->cleaned_to = wreq->start; 115 116 wreq->io_streams[0].stream_nr = 0; 117 wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER; 118 wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; 119 wreq->io_streams[0].issue_write = ictx->ops->issue_write; 120 wreq->io_streams[0].collected_to = start; 121 wreq->io_streams[0].transferred = 0; 122 123 wreq->io_streams[1].stream_nr = 1; 124 wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; 125 wreq->io_streams[1].collected_to = start; 126 wreq->io_streams[1].transferred = 0; 127 if (fscache_resources_valid(&wreq->cache_resources)) { 128 wreq->io_streams[1].avail = true; 129 wreq->io_streams[1].active = true; 130 wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq; 131 wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write; 132 } 133 134 return wreq; 135 nomem: 136 netfs_put_failed_request(wreq); 137 return ERR_PTR(-ENOMEM); 138 } 139 140 /** 141 * netfs_prepare_write_failed - Note write preparation failed 142 * @subreq: The subrequest to mark 143 * 144 * Mark a subrequest to note that preparation for write failed. 145 */ 146 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq) 147 { 148 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 149 trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed); 150 } 151 EXPORT_SYMBOL(netfs_prepare_write_failed); 152 153 /* 154 * Prepare a write subrequest. We need to allocate a new subrequest 155 * if we don't have one. 156 */ 157 static void netfs_prepare_write(struct netfs_io_request *wreq, 158 struct netfs_io_stream *stream, 159 loff_t start) 160 { 161 struct netfs_io_subrequest *subreq; 162 struct iov_iter *wreq_iter = &wreq->buffer.iter; 163 164 /* Make sure we don't point the iterator at a used-up folio_queue 165 * struct being used as a placeholder to prevent the queue from 166 * collapsing. In such a case, extend the queue. 167 */ 168 if (iov_iter_is_folioq(wreq_iter) && 169 wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) 170 rolling_buffer_make_space(&wreq->buffer); 171 172 subreq = netfs_alloc_subrequest(wreq); 173 subreq->source = stream->source; 174 subreq->start = start; 175 subreq->stream_nr = stream->stream_nr; 176 subreq->io_iter = *wreq_iter; 177 178 _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 179 180 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 181 182 stream->sreq_max_len = UINT_MAX; 183 stream->sreq_max_segs = INT_MAX; 184 switch (stream->source) { 185 case NETFS_UPLOAD_TO_SERVER: 186 netfs_stat(&netfs_n_wh_upload); 187 stream->sreq_max_len = wreq->wsize; 188 break; 189 case NETFS_WRITE_TO_CACHE: 190 netfs_stat(&netfs_n_wh_write); 191 break; 192 default: 193 WARN_ON_ONCE(1); 194 break; 195 } 196 197 if (stream->prepare_write) 198 stream->prepare_write(subreq); 199 200 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 201 202 /* We add to the end of the list whilst the collector may be walking 203 * the list. The collector only goes nextwards and uses the lock to 204 * remove entries off of the front. 205 */ 206 spin_lock(&wreq->lock); 207 list_add_tail(&subreq->rreq_link, &stream->subrequests); 208 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 209 stream->front = subreq; 210 if (!stream->active) { 211 stream->collected_to = stream->front->start; 212 /* Write list pointers before active flag */ 213 smp_store_release(&stream->active, true); 214 } 215 } 216 217 spin_unlock(&wreq->lock); 218 219 stream->construct = subreq; 220 } 221 222 /* 223 * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O 224 * operation. The operation may be asynchronous and should call 225 * netfs_write_subrequest_terminated() when complete. 226 */ 227 static void netfs_do_issue_write(struct netfs_io_stream *stream, 228 struct netfs_io_subrequest *subreq) 229 { 230 struct netfs_io_request *wreq = subreq->rreq; 231 232 _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); 233 234 if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 235 return netfs_write_subrequest_terminated(subreq, subreq->error); 236 237 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 238 stream->issue_write(subreq); 239 } 240 241 void netfs_reissue_write(struct netfs_io_stream *stream, 242 struct netfs_io_subrequest *subreq, 243 struct iov_iter *source) 244 { 245 size_t size = subreq->len - subreq->transferred; 246 247 // TODO: Use encrypted buffer 248 subreq->io_iter = *source; 249 iov_iter_advance(source, size); 250 iov_iter_truncate(&subreq->io_iter, size); 251 252 subreq->retry_count++; 253 __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 254 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 255 netfs_stat(&netfs_n_wh_retry_write_subreq); 256 netfs_do_issue_write(stream, subreq); 257 } 258 259 void netfs_issue_write(struct netfs_io_request *wreq, 260 struct netfs_io_stream *stream) 261 { 262 struct netfs_io_subrequest *subreq = stream->construct; 263 264 if (!subreq) 265 return; 266 stream->construct = NULL; 267 subreq->io_iter.count = subreq->len; 268 netfs_do_issue_write(stream, subreq); 269 } 270 271 /* 272 * Add data to the write subrequest, dispatching each as we fill it up or if it 273 * is discontiguous with the previous. We only fill one part at a time so that 274 * we can avoid overrunning the credits obtained (cifs) and try to parallelise 275 * content-crypto preparation with network writes. 276 */ 277 size_t netfs_advance_write(struct netfs_io_request *wreq, 278 struct netfs_io_stream *stream, 279 loff_t start, size_t len, bool to_eof) 280 { 281 struct netfs_io_subrequest *subreq = stream->construct; 282 size_t part; 283 284 if (!stream->avail) { 285 _leave("no write"); 286 return len; 287 } 288 289 _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); 290 291 if (subreq && start != subreq->start + subreq->len) { 292 netfs_issue_write(wreq, stream); 293 subreq = NULL; 294 } 295 296 if (!stream->construct) 297 netfs_prepare_write(wreq, stream, start); 298 subreq = stream->construct; 299 300 part = umin(stream->sreq_max_len - subreq->len, len); 301 _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len); 302 subreq->len += part; 303 subreq->nr_segs++; 304 stream->submit_extendable_to -= part; 305 306 if (subreq->len >= stream->sreq_max_len || 307 subreq->nr_segs >= stream->sreq_max_segs || 308 to_eof) { 309 netfs_issue_write(wreq, stream); 310 subreq = NULL; 311 } 312 313 return part; 314 } 315 316 /* 317 * Write some of a pending folio data back to the server. 318 */ 319 static int netfs_write_folio(struct netfs_io_request *wreq, 320 struct writeback_control *wbc, 321 struct folio *folio) 322 { 323 struct netfs_io_stream *upload = &wreq->io_streams[0]; 324 struct netfs_io_stream *cache = &wreq->io_streams[1]; 325 struct netfs_io_stream *stream; 326 struct netfs_group *fgroup; /* TODO: Use this with ceph */ 327 struct netfs_folio *finfo; 328 size_t iter_off = 0; 329 size_t fsize = folio_size(folio), flen = fsize, foff = 0; 330 loff_t fpos = folio_pos(folio), i_size; 331 bool to_eof = false, streamw = false; 332 bool debug = false; 333 334 _enter(""); 335 336 if (rolling_buffer_make_space(&wreq->buffer) < 0) 337 return -ENOMEM; 338 339 /* netfs_perform_write() may shift i_size around the page or from out 340 * of the page to beyond it, but cannot move i_size into or through the 341 * page since we have it locked. 342 */ 343 i_size = i_size_read(wreq->inode); 344 345 if (fpos >= i_size) { 346 /* mmap beyond eof. */ 347 _debug("beyond eof"); 348 folio_start_writeback(folio); 349 folio_unlock(folio); 350 wreq->nr_group_rel += netfs_folio_written_back(folio); 351 netfs_put_group_many(wreq->group, wreq->nr_group_rel); 352 wreq->nr_group_rel = 0; 353 return 0; 354 } 355 356 if (fpos + fsize > wreq->i_size) 357 wreq->i_size = i_size; 358 359 fgroup = netfs_folio_group(folio); 360 finfo = netfs_folio_info(folio); 361 if (finfo) { 362 foff = finfo->dirty_offset; 363 flen = foff + finfo->dirty_len; 364 streamw = true; 365 } 366 367 if (wreq->origin == NETFS_WRITETHROUGH) { 368 to_eof = false; 369 if (flen > i_size - fpos) 370 flen = i_size - fpos; 371 } else if (flen > i_size - fpos) { 372 flen = i_size - fpos; 373 if (!streamw) 374 folio_zero_segment(folio, flen, fsize); 375 to_eof = true; 376 } else if (flen == i_size - fpos) { 377 to_eof = true; 378 } 379 flen -= foff; 380 381 _debug("folio %zx %zx %zx", foff, flen, fsize); 382 383 /* Deal with discontinuities in the stream of dirty pages. These can 384 * arise from a number of sources: 385 * 386 * (1) Intervening non-dirty pages from random-access writes, multiple 387 * flushers writing back different parts simultaneously and manual 388 * syncing. 389 * 390 * (2) Partially-written pages from write-streaming. 391 * 392 * (3) Pages that belong to a different write-back group (eg. Ceph 393 * snapshots). 394 * 395 * (4) Actually-clean pages that were marked for write to the cache 396 * when they were read. Note that these appear as a special 397 * write-back group. 398 */ 399 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 400 netfs_issue_write(wreq, upload); 401 } else if (fgroup != wreq->group) { 402 /* We can't write this page to the server yet. */ 403 kdebug("wrong group"); 404 folio_redirty_for_writepage(wbc, folio); 405 folio_unlock(folio); 406 netfs_issue_write(wreq, upload); 407 netfs_issue_write(wreq, cache); 408 return 0; 409 } 410 411 if (foff > 0) 412 netfs_issue_write(wreq, upload); 413 if (streamw) 414 netfs_issue_write(wreq, cache); 415 416 /* Flip the page to the writeback state and unlock. If we're called 417 * from write-through, then the page has already been put into the wb 418 * state. 419 */ 420 if (wreq->origin == NETFS_WRITEBACK) 421 folio_start_writeback(folio); 422 folio_unlock(folio); 423 424 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 425 if (!cache->avail) { 426 trace_netfs_folio(folio, netfs_folio_trace_cancel_copy); 427 netfs_issue_write(wreq, upload); 428 netfs_folio_written_back(folio); 429 return 0; 430 } 431 trace_netfs_folio(folio, netfs_folio_trace_store_copy); 432 } else if (!upload->avail && !cache->avail) { 433 trace_netfs_folio(folio, netfs_folio_trace_cancel_store); 434 netfs_folio_written_back(folio); 435 return 0; 436 } else if (!upload->construct) { 437 trace_netfs_folio(folio, netfs_folio_trace_store); 438 } else { 439 trace_netfs_folio(folio, netfs_folio_trace_store_plus); 440 } 441 442 /* Attach the folio to the rolling buffer. */ 443 rolling_buffer_append(&wreq->buffer, folio, 0); 444 445 /* Move the submission point forward to allow for write-streaming data 446 * not starting at the front of the page. We don't do write-streaming 447 * with the cache as the cache requires DIO alignment. 448 * 449 * Also skip uploading for data that's been read and just needs copying 450 * to the cache. 451 */ 452 for (int s = 0; s < NR_IO_STREAMS; s++) { 453 stream = &wreq->io_streams[s]; 454 stream->submit_off = foff; 455 stream->submit_len = flen; 456 if (!stream->avail || 457 (stream->source == NETFS_WRITE_TO_CACHE && streamw) || 458 (stream->source == NETFS_UPLOAD_TO_SERVER && 459 fgroup == NETFS_FOLIO_COPY_TO_CACHE)) { 460 stream->submit_off = UINT_MAX; 461 stream->submit_len = 0; 462 } 463 } 464 465 /* Attach the folio to one or more subrequests. For a big folio, we 466 * could end up with thousands of subrequests if the wsize is small - 467 * but we might need to wait during the creation of subrequests for 468 * network resources (eg. SMB credits). 469 */ 470 for (;;) { 471 ssize_t part; 472 size_t lowest_off = ULONG_MAX; 473 int choose_s = -1; 474 475 /* Always add to the lowest-submitted stream first. */ 476 for (int s = 0; s < NR_IO_STREAMS; s++) { 477 stream = &wreq->io_streams[s]; 478 if (stream->submit_len > 0 && 479 stream->submit_off < lowest_off) { 480 lowest_off = stream->submit_off; 481 choose_s = s; 482 } 483 } 484 485 if (choose_s < 0) 486 break; 487 stream = &wreq->io_streams[choose_s]; 488 489 /* Advance the iterator(s). */ 490 if (stream->submit_off > iter_off) { 491 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off); 492 iter_off = stream->submit_off; 493 } 494 495 atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 496 stream->submit_extendable_to = fsize - stream->submit_off; 497 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 498 stream->submit_len, to_eof); 499 stream->submit_off += part; 500 if (part > stream->submit_len) 501 stream->submit_len = 0; 502 else 503 stream->submit_len -= part; 504 if (part > 0) 505 debug = true; 506 } 507 508 if (fsize > iter_off) 509 rolling_buffer_advance(&wreq->buffer, fsize - iter_off); 510 atomic64_set(&wreq->issued_to, fpos + fsize); 511 512 if (!debug) 513 kdebug("R=%x: No submit", wreq->debug_id); 514 515 if (foff + flen < fsize) 516 for (int s = 0; s < NR_IO_STREAMS; s++) 517 netfs_issue_write(wreq, &wreq->io_streams[s]); 518 519 _leave(" = 0"); 520 return 0; 521 } 522 523 /* 524 * End the issuing of writes, letting the collector know we're done. 525 */ 526 static void netfs_end_issue_write(struct netfs_io_request *wreq) 527 { 528 bool needs_poke = true; 529 530 smp_wmb(); /* Write subreq lists before ALL_QUEUED. */ 531 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 532 533 for (int s = 0; s < NR_IO_STREAMS; s++) { 534 struct netfs_io_stream *stream = &wreq->io_streams[s]; 535 536 if (!stream->active) 537 continue; 538 if (!list_empty(&stream->subrequests)) 539 needs_poke = false; 540 netfs_issue_write(wreq, stream); 541 } 542 543 if (needs_poke) 544 netfs_wake_collector(wreq); 545 } 546 547 /* 548 * Write some of the pending data back to the server 549 */ 550 int netfs_writepages(struct address_space *mapping, 551 struct writeback_control *wbc) 552 { 553 struct netfs_inode *ictx = netfs_inode(mapping->host); 554 struct netfs_io_request *wreq = NULL; 555 struct folio *folio; 556 int error = 0; 557 558 if (!mutex_trylock(&ictx->wb_lock)) { 559 if (wbc->sync_mode == WB_SYNC_NONE) { 560 netfs_stat(&netfs_n_wb_lock_skip); 561 return 0; 562 } 563 netfs_stat(&netfs_n_wb_lock_wait); 564 mutex_lock(&ictx->wb_lock); 565 } 566 567 /* Need the first folio to be able to set up the op. */ 568 folio = writeback_iter(mapping, wbc, NULL, &error); 569 if (!folio) 570 goto out; 571 572 wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK); 573 if (IS_ERR(wreq)) { 574 error = PTR_ERR(wreq); 575 goto couldnt_start; 576 } 577 578 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); 579 trace_netfs_write(wreq, netfs_write_trace_writeback); 580 netfs_stat(&netfs_n_wh_writepages); 581 582 do { 583 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to)); 584 585 /* It appears we don't have to handle cyclic writeback wrapping. */ 586 WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to)); 587 588 if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE && 589 unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) { 590 set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 591 wreq->netfs_ops->begin_writeback(wreq); 592 } 593 594 error = netfs_write_folio(wreq, wbc, folio); 595 if (error < 0) 596 break; 597 } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 598 599 netfs_end_issue_write(wreq); 600 601 mutex_unlock(&ictx->wb_lock); 602 netfs_wake_collector(wreq); 603 604 netfs_put_request(wreq, netfs_rreq_trace_put_return); 605 _leave(" = %d", error); 606 return error; 607 608 couldnt_start: 609 netfs_kill_dirty_pages(mapping, wbc, folio); 610 out: 611 mutex_unlock(&ictx->wb_lock); 612 _leave(" = %d", error); 613 return error; 614 } 615 EXPORT_SYMBOL(netfs_writepages); 616 617 /* 618 * Begin a write operation for writing through the pagecache. 619 */ 620 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 621 { 622 struct netfs_io_request *wreq = NULL; 623 struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp)); 624 625 mutex_lock(&ictx->wb_lock); 626 627 wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, 628 iocb->ki_pos, NETFS_WRITETHROUGH); 629 if (IS_ERR(wreq)) { 630 mutex_unlock(&ictx->wb_lock); 631 return wreq; 632 } 633 634 wreq->io_streams[0].avail = true; 635 trace_netfs_write(wreq, netfs_write_trace_writethrough); 636 return wreq; 637 } 638 639 /* 640 * Advance the state of the write operation used when writing through the 641 * pagecache. Data has been copied into the pagecache that we need to append 642 * to the request. If we've added more than wsize then we need to create a new 643 * subrequest. 644 */ 645 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 646 struct folio *folio, size_t copied, bool to_page_end, 647 struct folio **writethrough_cache) 648 { 649 _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", 650 wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end); 651 652 if (!*writethrough_cache) { 653 if (folio_test_dirty(folio)) 654 /* Sigh. mmap. */ 655 folio_clear_dirty_for_io(folio); 656 657 /* We can make multiple writes to the folio... */ 658 folio_start_writeback(folio); 659 if (wreq->len == 0) 660 trace_netfs_folio(folio, netfs_folio_trace_wthru); 661 else 662 trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 663 *writethrough_cache = folio; 664 } 665 666 wreq->len += copied; 667 if (!to_page_end) 668 return 0; 669 670 *writethrough_cache = NULL; 671 return netfs_write_folio(wreq, wbc, folio); 672 } 673 674 /* 675 * End a write operation used when writing through the pagecache. 676 */ 677 ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 678 struct folio *writethrough_cache) 679 { 680 struct netfs_inode *ictx = netfs_inode(wreq->inode); 681 ssize_t ret; 682 683 _enter("R=%x", wreq->debug_id); 684 685 if (writethrough_cache) 686 netfs_write_folio(wreq, wbc, writethrough_cache); 687 688 netfs_end_issue_write(wreq); 689 690 mutex_unlock(&ictx->wb_lock); 691 692 if (wreq->iocb) 693 ret = -EIOCBQUEUED; 694 else 695 ret = netfs_wait_for_write(wreq); 696 netfs_put_request(wreq, netfs_rreq_trace_put_return); 697 return ret; 698 } 699 700 /* 701 * Write data to the server without going through the pagecache and without 702 * writing it to the local cache. 703 */ 704 int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) 705 { 706 struct netfs_io_stream *upload = &wreq->io_streams[0]; 707 ssize_t part; 708 loff_t start = wreq->start; 709 int error = 0; 710 711 _enter("%zx", len); 712 713 if (wreq->origin == NETFS_DIO_WRITE) 714 inode_dio_begin(wreq->inode); 715 716 while (len) { 717 // TODO: Prepare content encryption 718 719 _debug("unbuffered %zx", len); 720 part = netfs_advance_write(wreq, upload, start, len, false); 721 start += part; 722 len -= part; 723 rolling_buffer_advance(&wreq->buffer, part); 724 if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) 725 netfs_wait_for_paused_write(wreq); 726 if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) 727 break; 728 } 729 730 netfs_end_issue_write(wreq); 731 _leave(" = %d", error); 732 return error; 733 } 734 735 /* 736 * Write some of a pending folio data back to the server and/or the cache. 737 */ 738 static int netfs_write_folio_single(struct netfs_io_request *wreq, 739 struct folio *folio) 740 { 741 struct netfs_io_stream *upload = &wreq->io_streams[0]; 742 struct netfs_io_stream *cache = &wreq->io_streams[1]; 743 struct netfs_io_stream *stream; 744 size_t iter_off = 0; 745 size_t fsize = folio_size(folio), flen; 746 loff_t fpos = folio_pos(folio); 747 bool to_eof = false; 748 bool no_debug = false; 749 750 _enter(""); 751 752 flen = folio_size(folio); 753 if (flen > wreq->i_size - fpos) { 754 flen = wreq->i_size - fpos; 755 folio_zero_segment(folio, flen, fsize); 756 to_eof = true; 757 } else if (flen == wreq->i_size - fpos) { 758 to_eof = true; 759 } 760 761 _debug("folio %zx/%zx", flen, fsize); 762 763 if (!upload->avail && !cache->avail) { 764 trace_netfs_folio(folio, netfs_folio_trace_cancel_store); 765 return 0; 766 } 767 768 if (!upload->construct) 769 trace_netfs_folio(folio, netfs_folio_trace_store); 770 else 771 trace_netfs_folio(folio, netfs_folio_trace_store_plus); 772 773 /* Attach the folio to the rolling buffer. */ 774 folio_get(folio); 775 rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK); 776 777 /* Move the submission point forward to allow for write-streaming data 778 * not starting at the front of the page. We don't do write-streaming 779 * with the cache as the cache requires DIO alignment. 780 * 781 * Also skip uploading for data that's been read and just needs copying 782 * to the cache. 783 */ 784 for (int s = 0; s < NR_IO_STREAMS; s++) { 785 stream = &wreq->io_streams[s]; 786 stream->submit_off = 0; 787 stream->submit_len = flen; 788 if (!stream->avail) { 789 stream->submit_off = UINT_MAX; 790 stream->submit_len = 0; 791 } 792 } 793 794 /* Attach the folio to one or more subrequests. For a big folio, we 795 * could end up with thousands of subrequests if the wsize is small - 796 * but we might need to wait during the creation of subrequests for 797 * network resources (eg. SMB credits). 798 */ 799 for (;;) { 800 ssize_t part; 801 size_t lowest_off = ULONG_MAX; 802 int choose_s = -1; 803 804 /* Always add to the lowest-submitted stream first. */ 805 for (int s = 0; s < NR_IO_STREAMS; s++) { 806 stream = &wreq->io_streams[s]; 807 if (stream->submit_len > 0 && 808 stream->submit_off < lowest_off) { 809 lowest_off = stream->submit_off; 810 choose_s = s; 811 } 812 } 813 814 if (choose_s < 0) 815 break; 816 stream = &wreq->io_streams[choose_s]; 817 818 /* Advance the iterator(s). */ 819 if (stream->submit_off > iter_off) { 820 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off); 821 iter_off = stream->submit_off; 822 } 823 824 atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 825 stream->submit_extendable_to = fsize - stream->submit_off; 826 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 827 stream->submit_len, to_eof); 828 stream->submit_off += part; 829 if (part > stream->submit_len) 830 stream->submit_len = 0; 831 else 832 stream->submit_len -= part; 833 if (part > 0) 834 no_debug = true; 835 } 836 837 wreq->buffer.iter.iov_offset = 0; 838 if (fsize > iter_off) 839 rolling_buffer_advance(&wreq->buffer, fsize - iter_off); 840 atomic64_set(&wreq->issued_to, fpos + fsize); 841 842 if (!no_debug) 843 kdebug("R=%x: No submit", wreq->debug_id); 844 _leave(" = 0"); 845 return 0; 846 } 847 848 /** 849 * netfs_writeback_single - Write back a monolithic payload 850 * @mapping: The mapping to write from 851 * @wbc: Hints from the VM 852 * @iter: Data to write, must be ITER_FOLIOQ. 853 * 854 * Write a monolithic, non-pagecache object back to the server and/or 855 * the cache. 856 */ 857 int netfs_writeback_single(struct address_space *mapping, 858 struct writeback_control *wbc, 859 struct iov_iter *iter) 860 { 861 struct netfs_io_request *wreq; 862 struct netfs_inode *ictx = netfs_inode(mapping->host); 863 struct folio_queue *fq; 864 size_t size = iov_iter_count(iter); 865 int ret; 866 867 if (WARN_ON_ONCE(!iov_iter_is_folioq(iter))) 868 return -EIO; 869 870 if (!mutex_trylock(&ictx->wb_lock)) { 871 if (wbc->sync_mode == WB_SYNC_NONE) { 872 netfs_stat(&netfs_n_wb_lock_skip); 873 return 0; 874 } 875 netfs_stat(&netfs_n_wb_lock_wait); 876 mutex_lock(&ictx->wb_lock); 877 } 878 879 wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE); 880 if (IS_ERR(wreq)) { 881 ret = PTR_ERR(wreq); 882 goto couldnt_start; 883 } 884 885 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); 886 trace_netfs_write(wreq, netfs_write_trace_writeback_single); 887 netfs_stat(&netfs_n_wh_writepages); 888 889 if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 890 wreq->netfs_ops->begin_writeback(wreq); 891 892 for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) { 893 for (int slot = 0; slot < folioq_count(fq); slot++) { 894 struct folio *folio = folioq_folio(fq, slot); 895 size_t part = umin(folioq_folio_size(fq, slot), size); 896 897 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to)); 898 899 ret = netfs_write_folio_single(wreq, folio); 900 if (ret < 0) 901 goto stop; 902 size -= part; 903 if (size <= 0) 904 goto stop; 905 } 906 } 907 908 stop: 909 for (int s = 0; s < NR_IO_STREAMS; s++) 910 netfs_issue_write(wreq, &wreq->io_streams[s]); 911 smp_wmb(); /* Write lists before ALL_QUEUED. */ 912 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 913 914 mutex_unlock(&ictx->wb_lock); 915 netfs_wake_collector(wreq); 916 917 netfs_put_request(wreq, netfs_rreq_trace_put_return); 918 _leave(" = %d", ret); 919 return ret; 920 921 couldnt_start: 922 mutex_unlock(&ictx->wb_lock); 923 _leave(" = %d", ret); 924 return ret; 925 } 926 EXPORT_SYMBOL(netfs_writeback_single); 927