1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Network filesystem high-level (buffered) writeback. 3 * 4 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 * 7 * 8 * To support network filesystems with local caching, we manage a situation 9 * that can be envisioned like the following: 10 * 11 * +---+---+-----+-----+---+----------+ 12 * Folios: | | | | | | | 13 * +---+---+-----+-----+---+----------+ 14 * 15 * +------+------+ +----+----+ 16 * Upload: | | |.....| | | 17 * (Stream 0) +------+------+ +----+----+ 18 * 19 * +------+------+------+------+------+ 20 * Cache: | | | | | | 21 * (Stream 1) +------+------+------+------+------+ 22 * 23 * Where we have a sequence of folios of varying sizes that we need to overlay 24 * with multiple parallel streams of I/O requests, where the I/O requests in a 25 * stream may also be of various sizes (in cifs, for example, the sizes are 26 * negotiated with the server; in something like ceph, they may represent the 27 * sizes of storage objects). 28 * 29 * The sequence in each stream may contain gaps and noncontiguous subrequests 30 * may be glued together into single vectored write RPCs. 31 */ 32 33 #include <linux/export.h> 34 #include <linux/fs.h> 35 #include <linux/mm.h> 36 #include <linux/pagemap.h> 37 #include "internal.h" 38 39 /* 40 * Kill all dirty folios in the event of an unrecoverable error, starting with 41 * a locked folio we've already obtained from writeback_iter(). 42 */ 43 static void netfs_kill_dirty_pages(struct address_space *mapping, 44 struct writeback_control *wbc, 45 struct folio *folio) 46 { 47 int error = 0; 48 49 do { 50 enum netfs_folio_trace why = netfs_folio_trace_kill; 51 struct netfs_group *group = NULL; 52 struct netfs_folio *finfo = NULL; 53 void *priv; 54 55 priv = folio_detach_private(folio); 56 if (priv) { 57 finfo = __netfs_folio_info(priv); 58 if (finfo) { 59 /* Kill folio from streaming write. */ 60 group = finfo->netfs_group; 61 why = netfs_folio_trace_kill_s; 62 } else { 63 group = priv; 64 if (group == NETFS_FOLIO_COPY_TO_CACHE) { 65 /* Kill copy-to-cache folio */ 66 why = netfs_folio_trace_kill_cc; 67 group = NULL; 68 } else { 69 /* Kill folio with group */ 70 why = netfs_folio_trace_kill_g; 71 } 72 } 73 } 74 75 trace_netfs_folio(folio, why); 76 77 folio_start_writeback(folio); 78 folio_unlock(folio); 79 folio_end_writeback(folio); 80 81 netfs_put_group(group); 82 kfree(finfo); 83 84 } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 85 } 86 87 /* 88 * Create a write request and set it up appropriately for the origin type. 89 */ 90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, 91 struct file *file, 92 loff_t start, 93 enum netfs_io_origin origin) 94 { 95 struct netfs_io_request *wreq; 96 struct netfs_inode *ictx; 97 bool is_cacheable = (origin == NETFS_WRITEBACK || 98 origin == NETFS_WRITEBACK_SINGLE || 99 origin == NETFS_WRITETHROUGH || 100 origin == NETFS_PGPRIV2_COPY_TO_CACHE); 101 102 wreq = netfs_alloc_request(mapping, file, start, 0, origin); 103 if (IS_ERR(wreq)) 104 return wreq; 105 106 _enter("R=%x", wreq->debug_id); 107 108 ictx = netfs_inode(wreq->inode); 109 if (is_cacheable && netfs_is_cache_enabled(ictx)) 110 fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); 111 if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0) 112 goto nomem; 113 114 wreq->cleaned_to = wreq->start; 115 116 wreq->io_streams[0].stream_nr = 0; 117 wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER; 118 wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; 119 wreq->io_streams[0].issue_write = ictx->ops->issue_write; 120 wreq->io_streams[0].collected_to = start; 121 wreq->io_streams[0].transferred = LONG_MAX; 122 123 wreq->io_streams[1].stream_nr = 1; 124 wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; 125 wreq->io_streams[1].collected_to = start; 126 wreq->io_streams[1].transferred = LONG_MAX; 127 if (fscache_resources_valid(&wreq->cache_resources)) { 128 wreq->io_streams[1].avail = true; 129 wreq->io_streams[1].active = true; 130 wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq; 131 wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write; 132 } 133 134 return wreq; 135 nomem: 136 wreq->error = -ENOMEM; 137 netfs_put_request(wreq, false, netfs_rreq_trace_put_failed); 138 return ERR_PTR(-ENOMEM); 139 } 140 141 /** 142 * netfs_prepare_write_failed - Note write preparation failed 143 * @subreq: The subrequest to mark 144 * 145 * Mark a subrequest to note that preparation for write failed. 146 */ 147 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq) 148 { 149 __set_bit(NETFS_SREQ_FAILED, &subreq->flags); 150 trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed); 151 } 152 EXPORT_SYMBOL(netfs_prepare_write_failed); 153 154 /* 155 * Prepare a write subrequest. We need to allocate a new subrequest 156 * if we don't have one. 157 */ 158 static void netfs_prepare_write(struct netfs_io_request *wreq, 159 struct netfs_io_stream *stream, 160 loff_t start) 161 { 162 struct netfs_io_subrequest *subreq; 163 struct iov_iter *wreq_iter = &wreq->buffer.iter; 164 165 /* Make sure we don't point the iterator at a used-up folio_queue 166 * struct being used as a placeholder to prevent the queue from 167 * collapsing. In such a case, extend the queue. 168 */ 169 if (iov_iter_is_folioq(wreq_iter) && 170 wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) 171 rolling_buffer_make_space(&wreq->buffer); 172 173 subreq = netfs_alloc_subrequest(wreq); 174 subreq->source = stream->source; 175 subreq->start = start; 176 subreq->stream_nr = stream->stream_nr; 177 subreq->io_iter = *wreq_iter; 178 179 _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index); 180 181 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); 182 183 stream->sreq_max_len = UINT_MAX; 184 stream->sreq_max_segs = INT_MAX; 185 switch (stream->source) { 186 case NETFS_UPLOAD_TO_SERVER: 187 netfs_stat(&netfs_n_wh_upload); 188 stream->sreq_max_len = wreq->wsize; 189 break; 190 case NETFS_WRITE_TO_CACHE: 191 netfs_stat(&netfs_n_wh_write); 192 break; 193 default: 194 WARN_ON_ONCE(1); 195 break; 196 } 197 198 if (stream->prepare_write) 199 stream->prepare_write(subreq); 200 201 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 202 203 /* We add to the end of the list whilst the collector may be walking 204 * the list. The collector only goes nextwards and uses the lock to 205 * remove entries off of the front. 206 */ 207 spin_lock(&wreq->lock); 208 list_add_tail(&subreq->rreq_link, &stream->subrequests); 209 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) { 210 stream->front = subreq; 211 if (!stream->active) { 212 stream->collected_to = stream->front->start; 213 /* Write list pointers before active flag */ 214 smp_store_release(&stream->active, true); 215 } 216 } 217 218 spin_unlock(&wreq->lock); 219 220 stream->construct = subreq; 221 } 222 223 /* 224 * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O 225 * operation. The operation may be asynchronous and should call 226 * netfs_write_subrequest_terminated() when complete. 227 */ 228 static void netfs_do_issue_write(struct netfs_io_stream *stream, 229 struct netfs_io_subrequest *subreq) 230 { 231 struct netfs_io_request *wreq = subreq->rreq; 232 233 _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); 234 235 if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 236 return netfs_write_subrequest_terminated(subreq, subreq->error, false); 237 238 trace_netfs_sreq(subreq, netfs_sreq_trace_submit); 239 stream->issue_write(subreq); 240 } 241 242 void netfs_reissue_write(struct netfs_io_stream *stream, 243 struct netfs_io_subrequest *subreq, 244 struct iov_iter *source) 245 { 246 size_t size = subreq->len - subreq->transferred; 247 248 // TODO: Use encrypted buffer 249 subreq->io_iter = *source; 250 iov_iter_advance(source, size); 251 iov_iter_truncate(&subreq->io_iter, size); 252 253 subreq->retry_count++; 254 __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); 255 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 256 netfs_do_issue_write(stream, subreq); 257 } 258 259 void netfs_issue_write(struct netfs_io_request *wreq, 260 struct netfs_io_stream *stream) 261 { 262 struct netfs_io_subrequest *subreq = stream->construct; 263 264 if (!subreq) 265 return; 266 stream->construct = NULL; 267 subreq->io_iter.count = subreq->len; 268 netfs_do_issue_write(stream, subreq); 269 } 270 271 /* 272 * Add data to the write subrequest, dispatching each as we fill it up or if it 273 * is discontiguous with the previous. We only fill one part at a time so that 274 * we can avoid overrunning the credits obtained (cifs) and try to parallelise 275 * content-crypto preparation with network writes. 276 */ 277 size_t netfs_advance_write(struct netfs_io_request *wreq, 278 struct netfs_io_stream *stream, 279 loff_t start, size_t len, bool to_eof) 280 { 281 struct netfs_io_subrequest *subreq = stream->construct; 282 size_t part; 283 284 if (!stream->avail) { 285 _leave("no write"); 286 return len; 287 } 288 289 _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0); 290 291 if (subreq && start != subreq->start + subreq->len) { 292 netfs_issue_write(wreq, stream); 293 subreq = NULL; 294 } 295 296 if (!stream->construct) 297 netfs_prepare_write(wreq, stream, start); 298 subreq = stream->construct; 299 300 part = umin(stream->sreq_max_len - subreq->len, len); 301 _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len); 302 subreq->len += part; 303 subreq->nr_segs++; 304 stream->submit_extendable_to -= part; 305 306 if (subreq->len >= stream->sreq_max_len || 307 subreq->nr_segs >= stream->sreq_max_segs || 308 to_eof) { 309 netfs_issue_write(wreq, stream); 310 subreq = NULL; 311 } 312 313 return part; 314 } 315 316 /* 317 * Write some of a pending folio data back to the server. 318 */ 319 static int netfs_write_folio(struct netfs_io_request *wreq, 320 struct writeback_control *wbc, 321 struct folio *folio) 322 { 323 struct netfs_io_stream *upload = &wreq->io_streams[0]; 324 struct netfs_io_stream *cache = &wreq->io_streams[1]; 325 struct netfs_io_stream *stream; 326 struct netfs_group *fgroup; /* TODO: Use this with ceph */ 327 struct netfs_folio *finfo; 328 size_t iter_off = 0; 329 size_t fsize = folio_size(folio), flen = fsize, foff = 0; 330 loff_t fpos = folio_pos(folio), i_size; 331 bool to_eof = false, streamw = false; 332 bool debug = false; 333 334 _enter(""); 335 336 if (rolling_buffer_make_space(&wreq->buffer) < 0) 337 return -ENOMEM; 338 339 /* netfs_perform_write() may shift i_size around the page or from out 340 * of the page to beyond it, but cannot move i_size into or through the 341 * page since we have it locked. 342 */ 343 i_size = i_size_read(wreq->inode); 344 345 if (fpos >= i_size) { 346 /* mmap beyond eof. */ 347 _debug("beyond eof"); 348 folio_start_writeback(folio); 349 folio_unlock(folio); 350 wreq->nr_group_rel += netfs_folio_written_back(folio); 351 netfs_put_group_many(wreq->group, wreq->nr_group_rel); 352 wreq->nr_group_rel = 0; 353 return 0; 354 } 355 356 if (fpos + fsize > wreq->i_size) 357 wreq->i_size = i_size; 358 359 fgroup = netfs_folio_group(folio); 360 finfo = netfs_folio_info(folio); 361 if (finfo) { 362 foff = finfo->dirty_offset; 363 flen = foff + finfo->dirty_len; 364 streamw = true; 365 } 366 367 if (wreq->origin == NETFS_WRITETHROUGH) { 368 to_eof = false; 369 if (flen > i_size - fpos) 370 flen = i_size - fpos; 371 } else if (flen > i_size - fpos) { 372 flen = i_size - fpos; 373 if (!streamw) 374 folio_zero_segment(folio, flen, fsize); 375 to_eof = true; 376 } else if (flen == i_size - fpos) { 377 to_eof = true; 378 } 379 flen -= foff; 380 381 _debug("folio %zx %zx %zx", foff, flen, fsize); 382 383 /* Deal with discontinuities in the stream of dirty pages. These can 384 * arise from a number of sources: 385 * 386 * (1) Intervening non-dirty pages from random-access writes, multiple 387 * flushers writing back different parts simultaneously and manual 388 * syncing. 389 * 390 * (2) Partially-written pages from write-streaming. 391 * 392 * (3) Pages that belong to a different write-back group (eg. Ceph 393 * snapshots). 394 * 395 * (4) Actually-clean pages that were marked for write to the cache 396 * when they were read. Note that these appear as a special 397 * write-back group. 398 */ 399 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 400 netfs_issue_write(wreq, upload); 401 } else if (fgroup != wreq->group) { 402 /* We can't write this page to the server yet. */ 403 kdebug("wrong group"); 404 folio_redirty_for_writepage(wbc, folio); 405 folio_unlock(folio); 406 netfs_issue_write(wreq, upload); 407 netfs_issue_write(wreq, cache); 408 return 0; 409 } 410 411 if (foff > 0) 412 netfs_issue_write(wreq, upload); 413 if (streamw) 414 netfs_issue_write(wreq, cache); 415 416 /* Flip the page to the writeback state and unlock. If we're called 417 * from write-through, then the page has already been put into the wb 418 * state. 419 */ 420 if (wreq->origin == NETFS_WRITEBACK) 421 folio_start_writeback(folio); 422 folio_unlock(folio); 423 424 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) { 425 if (!cache->avail) { 426 trace_netfs_folio(folio, netfs_folio_trace_cancel_copy); 427 netfs_issue_write(wreq, upload); 428 netfs_folio_written_back(folio); 429 return 0; 430 } 431 trace_netfs_folio(folio, netfs_folio_trace_store_copy); 432 } else if (!upload->avail && !cache->avail) { 433 trace_netfs_folio(folio, netfs_folio_trace_cancel_store); 434 netfs_folio_written_back(folio); 435 return 0; 436 } else if (!upload->construct) { 437 trace_netfs_folio(folio, netfs_folio_trace_store); 438 } else { 439 trace_netfs_folio(folio, netfs_folio_trace_store_plus); 440 } 441 442 /* Attach the folio to the rolling buffer. */ 443 rolling_buffer_append(&wreq->buffer, folio, 0); 444 445 /* Move the submission point forward to allow for write-streaming data 446 * not starting at the front of the page. We don't do write-streaming 447 * with the cache as the cache requires DIO alignment. 448 * 449 * Also skip uploading for data that's been read and just needs copying 450 * to the cache. 451 */ 452 for (int s = 0; s < NR_IO_STREAMS; s++) { 453 stream = &wreq->io_streams[s]; 454 stream->submit_off = foff; 455 stream->submit_len = flen; 456 if (!stream->avail || 457 (stream->source == NETFS_WRITE_TO_CACHE && streamw) || 458 (stream->source == NETFS_UPLOAD_TO_SERVER && 459 fgroup == NETFS_FOLIO_COPY_TO_CACHE)) { 460 stream->submit_off = UINT_MAX; 461 stream->submit_len = 0; 462 } 463 } 464 465 /* Attach the folio to one or more subrequests. For a big folio, we 466 * could end up with thousands of subrequests if the wsize is small - 467 * but we might need to wait during the creation of subrequests for 468 * network resources (eg. SMB credits). 469 */ 470 for (;;) { 471 ssize_t part; 472 size_t lowest_off = ULONG_MAX; 473 int choose_s = -1; 474 475 /* Always add to the lowest-submitted stream first. */ 476 for (int s = 0; s < NR_IO_STREAMS; s++) { 477 stream = &wreq->io_streams[s]; 478 if (stream->submit_len > 0 && 479 stream->submit_off < lowest_off) { 480 lowest_off = stream->submit_off; 481 choose_s = s; 482 } 483 } 484 485 if (choose_s < 0) 486 break; 487 stream = &wreq->io_streams[choose_s]; 488 489 /* Advance the iterator(s). */ 490 if (stream->submit_off > iter_off) { 491 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off); 492 iter_off = stream->submit_off; 493 } 494 495 atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 496 stream->submit_extendable_to = fsize - stream->submit_off; 497 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 498 stream->submit_len, to_eof); 499 stream->submit_off += part; 500 if (part > stream->submit_len) 501 stream->submit_len = 0; 502 else 503 stream->submit_len -= part; 504 if (part > 0) 505 debug = true; 506 } 507 508 if (fsize > iter_off) 509 rolling_buffer_advance(&wreq->buffer, fsize - iter_off); 510 atomic64_set(&wreq->issued_to, fpos + fsize); 511 512 if (!debug) 513 kdebug("R=%x: No submit", wreq->debug_id); 514 515 if (foff + flen < fsize) 516 for (int s = 0; s < NR_IO_STREAMS; s++) 517 netfs_issue_write(wreq, &wreq->io_streams[s]); 518 519 _leave(" = 0"); 520 return 0; 521 } 522 523 /* 524 * End the issuing of writes, letting the collector know we're done. 525 */ 526 static void netfs_end_issue_write(struct netfs_io_request *wreq) 527 { 528 bool needs_poke = true; 529 530 smp_wmb(); /* Write subreq lists before ALL_QUEUED. */ 531 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 532 533 for (int s = 0; s < NR_IO_STREAMS; s++) { 534 struct netfs_io_stream *stream = &wreq->io_streams[s]; 535 536 if (!stream->active) 537 continue; 538 if (!list_empty(&stream->subrequests)) 539 needs_poke = false; 540 netfs_issue_write(wreq, stream); 541 } 542 543 if (needs_poke) 544 netfs_wake_write_collector(wreq, false); 545 } 546 547 /* 548 * Write some of the pending data back to the server 549 */ 550 int netfs_writepages(struct address_space *mapping, 551 struct writeback_control *wbc) 552 { 553 struct netfs_inode *ictx = netfs_inode(mapping->host); 554 struct netfs_io_request *wreq = NULL; 555 struct folio *folio; 556 int error = 0; 557 558 if (!mutex_trylock(&ictx->wb_lock)) { 559 if (wbc->sync_mode == WB_SYNC_NONE) { 560 netfs_stat(&netfs_n_wb_lock_skip); 561 return 0; 562 } 563 netfs_stat(&netfs_n_wb_lock_wait); 564 mutex_lock(&ictx->wb_lock); 565 } 566 567 /* Need the first folio to be able to set up the op. */ 568 folio = writeback_iter(mapping, wbc, NULL, &error); 569 if (!folio) 570 goto out; 571 572 wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK); 573 if (IS_ERR(wreq)) { 574 error = PTR_ERR(wreq); 575 goto couldnt_start; 576 } 577 578 trace_netfs_write(wreq, netfs_write_trace_writeback); 579 netfs_stat(&netfs_n_wh_writepages); 580 581 do { 582 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to)); 583 584 /* It appears we don't have to handle cyclic writeback wrapping. */ 585 WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to)); 586 587 if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE && 588 unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) { 589 set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); 590 wreq->netfs_ops->begin_writeback(wreq); 591 } 592 593 error = netfs_write_folio(wreq, wbc, folio); 594 if (error < 0) 595 break; 596 } while ((folio = writeback_iter(mapping, wbc, folio, &error))); 597 598 netfs_end_issue_write(wreq); 599 600 mutex_unlock(&ictx->wb_lock); 601 602 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 603 _leave(" = %d", error); 604 return error; 605 606 couldnt_start: 607 netfs_kill_dirty_pages(mapping, wbc, folio); 608 out: 609 mutex_unlock(&ictx->wb_lock); 610 _leave(" = %d", error); 611 return error; 612 } 613 EXPORT_SYMBOL(netfs_writepages); 614 615 /* 616 * Begin a write operation for writing through the pagecache. 617 */ 618 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) 619 { 620 struct netfs_io_request *wreq = NULL; 621 struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp)); 622 623 mutex_lock(&ictx->wb_lock); 624 625 wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, 626 iocb->ki_pos, NETFS_WRITETHROUGH); 627 if (IS_ERR(wreq)) { 628 mutex_unlock(&ictx->wb_lock); 629 return wreq; 630 } 631 632 wreq->io_streams[0].avail = true; 633 trace_netfs_write(wreq, netfs_write_trace_writethrough); 634 return wreq; 635 } 636 637 /* 638 * Advance the state of the write operation used when writing through the 639 * pagecache. Data has been copied into the pagecache that we need to append 640 * to the request. If we've added more than wsize then we need to create a new 641 * subrequest. 642 */ 643 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 644 struct folio *folio, size_t copied, bool to_page_end, 645 struct folio **writethrough_cache) 646 { 647 _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u", 648 wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end); 649 650 if (!*writethrough_cache) { 651 if (folio_test_dirty(folio)) 652 /* Sigh. mmap. */ 653 folio_clear_dirty_for_io(folio); 654 655 /* We can make multiple writes to the folio... */ 656 folio_start_writeback(folio); 657 if (wreq->len == 0) 658 trace_netfs_folio(folio, netfs_folio_trace_wthru); 659 else 660 trace_netfs_folio(folio, netfs_folio_trace_wthru_plus); 661 *writethrough_cache = folio; 662 } 663 664 wreq->len += copied; 665 if (!to_page_end) 666 return 0; 667 668 *writethrough_cache = NULL; 669 return netfs_write_folio(wreq, wbc, folio); 670 } 671 672 /* 673 * End a write operation used when writing through the pagecache. 674 */ 675 int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, 676 struct folio *writethrough_cache) 677 { 678 struct netfs_inode *ictx = netfs_inode(wreq->inode); 679 int ret; 680 681 _enter("R=%x", wreq->debug_id); 682 683 if (writethrough_cache) 684 netfs_write_folio(wreq, wbc, writethrough_cache); 685 686 netfs_end_issue_write(wreq); 687 688 mutex_unlock(&ictx->wb_lock); 689 690 if (wreq->iocb) { 691 ret = -EIOCBQUEUED; 692 } else { 693 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); 694 ret = wreq->error; 695 } 696 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 697 return ret; 698 } 699 700 /* 701 * Write data to the server without going through the pagecache and without 702 * writing it to the local cache. 703 */ 704 int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len) 705 { 706 struct netfs_io_stream *upload = &wreq->io_streams[0]; 707 ssize_t part; 708 loff_t start = wreq->start; 709 int error = 0; 710 711 _enter("%zx", len); 712 713 if (wreq->origin == NETFS_DIO_WRITE) 714 inode_dio_begin(wreq->inode); 715 716 while (len) { 717 // TODO: Prepare content encryption 718 719 _debug("unbuffered %zx", len); 720 part = netfs_advance_write(wreq, upload, start, len, false); 721 start += part; 722 len -= part; 723 rolling_buffer_advance(&wreq->buffer, part); 724 if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { 725 trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); 726 wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags)); 727 } 728 if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) 729 break; 730 } 731 732 netfs_end_issue_write(wreq); 733 _leave(" = %d", error); 734 return error; 735 } 736 737 /* 738 * Write some of a pending folio data back to the server and/or the cache. 739 */ 740 static int netfs_write_folio_single(struct netfs_io_request *wreq, 741 struct folio *folio) 742 { 743 struct netfs_io_stream *upload = &wreq->io_streams[0]; 744 struct netfs_io_stream *cache = &wreq->io_streams[1]; 745 struct netfs_io_stream *stream; 746 size_t iter_off = 0; 747 size_t fsize = folio_size(folio), flen; 748 loff_t fpos = folio_pos(folio); 749 bool to_eof = false; 750 bool no_debug = false; 751 752 _enter(""); 753 754 flen = folio_size(folio); 755 if (flen > wreq->i_size - fpos) { 756 flen = wreq->i_size - fpos; 757 folio_zero_segment(folio, flen, fsize); 758 to_eof = true; 759 } else if (flen == wreq->i_size - fpos) { 760 to_eof = true; 761 } 762 763 _debug("folio %zx/%zx", flen, fsize); 764 765 if (!upload->avail && !cache->avail) { 766 trace_netfs_folio(folio, netfs_folio_trace_cancel_store); 767 return 0; 768 } 769 770 if (!upload->construct) 771 trace_netfs_folio(folio, netfs_folio_trace_store); 772 else 773 trace_netfs_folio(folio, netfs_folio_trace_store_plus); 774 775 /* Attach the folio to the rolling buffer. */ 776 folio_get(folio); 777 rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK); 778 779 /* Move the submission point forward to allow for write-streaming data 780 * not starting at the front of the page. We don't do write-streaming 781 * with the cache as the cache requires DIO alignment. 782 * 783 * Also skip uploading for data that's been read and just needs copying 784 * to the cache. 785 */ 786 for (int s = 0; s < NR_IO_STREAMS; s++) { 787 stream = &wreq->io_streams[s]; 788 stream->submit_off = 0; 789 stream->submit_len = flen; 790 if (!stream->avail) { 791 stream->submit_off = UINT_MAX; 792 stream->submit_len = 0; 793 } 794 } 795 796 /* Attach the folio to one or more subrequests. For a big folio, we 797 * could end up with thousands of subrequests if the wsize is small - 798 * but we might need to wait during the creation of subrequests for 799 * network resources (eg. SMB credits). 800 */ 801 for (;;) { 802 ssize_t part; 803 size_t lowest_off = ULONG_MAX; 804 int choose_s = -1; 805 806 /* Always add to the lowest-submitted stream first. */ 807 for (int s = 0; s < NR_IO_STREAMS; s++) { 808 stream = &wreq->io_streams[s]; 809 if (stream->submit_len > 0 && 810 stream->submit_off < lowest_off) { 811 lowest_off = stream->submit_off; 812 choose_s = s; 813 } 814 } 815 816 if (choose_s < 0) 817 break; 818 stream = &wreq->io_streams[choose_s]; 819 820 /* Advance the iterator(s). */ 821 if (stream->submit_off > iter_off) { 822 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off); 823 iter_off = stream->submit_off; 824 } 825 826 atomic64_set(&wreq->issued_to, fpos + stream->submit_off); 827 stream->submit_extendable_to = fsize - stream->submit_off; 828 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off, 829 stream->submit_len, to_eof); 830 stream->submit_off += part; 831 if (part > stream->submit_len) 832 stream->submit_len = 0; 833 else 834 stream->submit_len -= part; 835 if (part > 0) 836 no_debug = true; 837 } 838 839 wreq->buffer.iter.iov_offset = 0; 840 if (fsize > iter_off) 841 rolling_buffer_advance(&wreq->buffer, fsize - iter_off); 842 atomic64_set(&wreq->issued_to, fpos + fsize); 843 844 if (!no_debug) 845 kdebug("R=%x: No submit", wreq->debug_id); 846 _leave(" = 0"); 847 return 0; 848 } 849 850 /** 851 * netfs_writeback_single - Write back a monolithic payload 852 * @mapping: The mapping to write from 853 * @wbc: Hints from the VM 854 * @iter: Data to write, must be ITER_FOLIOQ. 855 * 856 * Write a monolithic, non-pagecache object back to the server and/or 857 * the cache. 858 */ 859 int netfs_writeback_single(struct address_space *mapping, 860 struct writeback_control *wbc, 861 struct iov_iter *iter) 862 { 863 struct netfs_io_request *wreq; 864 struct netfs_inode *ictx = netfs_inode(mapping->host); 865 struct folio_queue *fq; 866 size_t size = iov_iter_count(iter); 867 int ret; 868 869 if (WARN_ON_ONCE(!iov_iter_is_folioq(iter))) 870 return -EIO; 871 872 if (!mutex_trylock(&ictx->wb_lock)) { 873 if (wbc->sync_mode == WB_SYNC_NONE) { 874 netfs_stat(&netfs_n_wb_lock_skip); 875 return 0; 876 } 877 netfs_stat(&netfs_n_wb_lock_wait); 878 mutex_lock(&ictx->wb_lock); 879 } 880 881 wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE); 882 if (IS_ERR(wreq)) { 883 ret = PTR_ERR(wreq); 884 goto couldnt_start; 885 } 886 887 trace_netfs_write(wreq, netfs_write_trace_writeback); 888 netfs_stat(&netfs_n_wh_writepages); 889 890 if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) 891 wreq->netfs_ops->begin_writeback(wreq); 892 893 for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) { 894 for (int slot = 0; slot < folioq_count(fq); slot++) { 895 struct folio *folio = folioq_folio(fq, slot); 896 size_t part = umin(folioq_folio_size(fq, slot), size); 897 898 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to)); 899 900 ret = netfs_write_folio_single(wreq, folio); 901 if (ret < 0) 902 goto stop; 903 size -= part; 904 if (size <= 0) 905 goto stop; 906 } 907 } 908 909 stop: 910 for (int s = 0; s < NR_IO_STREAMS; s++) 911 netfs_issue_write(wreq, &wreq->io_streams[s]); 912 smp_wmb(); /* Write lists before ALL_QUEUED. */ 913 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); 914 915 mutex_unlock(&ictx->wb_lock); 916 917 netfs_put_request(wreq, false, netfs_rreq_trace_put_return); 918 _leave(" = %d", ret); 919 return ret; 920 921 couldnt_start: 922 mutex_unlock(&ictx->wb_lock); 923 _leave(" = %d", ret); 924 return ret; 925 } 926 EXPORT_SYMBOL(netfs_writeback_single); 927