1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem high-level (buffered) writeback.
3 *
4 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 *
8 * To support network filesystems with local caching, we manage a situation
9 * that can be envisioned like the following:
10 *
11 * +---+---+-----+-----+---+----------+
12 * Folios: | | | | | | |
13 * +---+---+-----+-----+---+----------+
14 *
15 * +------+------+ +----+----+
16 * Upload: | | |.....| | |
17 * (Stream 0) +------+------+ +----+----+
18 *
19 * +------+------+------+------+------+
20 * Cache: | | | | | |
21 * (Stream 1) +------+------+------+------+------+
22 *
23 * Where we have a sequence of folios of varying sizes that we need to overlay
24 * with multiple parallel streams of I/O requests, where the I/O requests in a
25 * stream may also be of various sizes (in cifs, for example, the sizes are
26 * negotiated with the server; in something like ceph, they may represent the
27 * sizes of storage objects).
28 *
29 * The sequence in each stream may contain gaps and noncontiguous subrequests
30 * may be glued together into single vectored write RPCs.
31 */
32
33 #include <linux/export.h>
34 #include <linux/fs.h>
35 #include <linux/mm.h>
36 #include <linux/pagemap.h>
37 #include "internal.h"
38
39 /*
40 * Kill all dirty folios in the event of an unrecoverable error, starting with
41 * a locked folio we've already obtained from writeback_iter().
42 */
netfs_kill_dirty_pages(struct address_space * mapping,struct writeback_control * wbc,struct folio * folio)43 static void netfs_kill_dirty_pages(struct address_space *mapping,
44 struct writeback_control *wbc,
45 struct folio *folio)
46 {
47 int error = 0;
48
49 do {
50 enum netfs_folio_trace why = netfs_folio_trace_kill;
51 struct netfs_group *group = NULL;
52 struct netfs_folio *finfo = NULL;
53 void *priv;
54
55 priv = folio_detach_private(folio);
56 if (priv) {
57 finfo = __netfs_folio_info(priv);
58 if (finfo) {
59 /* Kill folio from streaming write. */
60 group = finfo->netfs_group;
61 why = netfs_folio_trace_kill_s;
62 } else {
63 group = priv;
64 if (group == NETFS_FOLIO_COPY_TO_CACHE) {
65 /* Kill copy-to-cache folio */
66 why = netfs_folio_trace_kill_cc;
67 group = NULL;
68 } else {
69 /* Kill folio with group */
70 why = netfs_folio_trace_kill_g;
71 }
72 }
73 }
74
75 trace_netfs_folio(folio, why);
76
77 folio_start_writeback(folio);
78 folio_unlock(folio);
79 folio_end_writeback(folio);
80
81 netfs_put_group(group);
82 kfree(finfo);
83
84 } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
85 }
86
87 /*
88 * Create a write request and set it up appropriately for the origin type.
89 */
netfs_create_write_req(struct address_space * mapping,struct file * file,loff_t start,enum netfs_io_origin origin)90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
91 struct file *file,
92 loff_t start,
93 enum netfs_io_origin origin)
94 {
95 struct netfs_io_request *wreq;
96 struct netfs_inode *ictx;
97 bool is_cacheable = (origin == NETFS_WRITEBACK ||
98 origin == NETFS_WRITEBACK_SINGLE ||
99 origin == NETFS_WRITETHROUGH ||
100 origin == NETFS_PGPRIV2_COPY_TO_CACHE);
101
102 wreq = netfs_alloc_request(mapping, file, start, 0, origin);
103 if (IS_ERR(wreq))
104 return wreq;
105
106 _enter("R=%x", wreq->debug_id);
107
108 ictx = netfs_inode(wreq->inode);
109 if (is_cacheable && netfs_is_cache_enabled(ictx))
110 fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
111 if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
112 goto nomem;
113
114 wreq->cleaned_to = wreq->start;
115
116 wreq->io_streams[0].stream_nr = 0;
117 wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER;
118 wreq->io_streams[0].prepare_write = ictx->ops->prepare_write;
119 wreq->io_streams[0].issue_write = ictx->ops->issue_write;
120 wreq->io_streams[0].collected_to = start;
121 wreq->io_streams[0].transferred = 0;
122
123 wreq->io_streams[1].stream_nr = 1;
124 wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE;
125 wreq->io_streams[1].collected_to = start;
126 wreq->io_streams[1].transferred = 0;
127 if (fscache_resources_valid(&wreq->cache_resources)) {
128 wreq->io_streams[1].avail = true;
129 wreq->io_streams[1].active = true;
130 wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
131 wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
132 }
133
134 return wreq;
135 nomem:
136 netfs_put_failed_request(wreq);
137 return ERR_PTR(-ENOMEM);
138 }
139
140 /**
141 * netfs_prepare_write_failed - Note write preparation failed
142 * @subreq: The subrequest to mark
143 *
144 * Mark a subrequest to note that preparation for write failed.
145 */
netfs_prepare_write_failed(struct netfs_io_subrequest * subreq)146 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
147 {
148 __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
149 trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
150 }
151 EXPORT_SYMBOL(netfs_prepare_write_failed);
152
153 /*
154 * Prepare a write subrequest. We need to allocate a new subrequest
155 * if we don't have one.
156 */
netfs_prepare_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream,loff_t start)157 static void netfs_prepare_write(struct netfs_io_request *wreq,
158 struct netfs_io_stream *stream,
159 loff_t start)
160 {
161 struct netfs_io_subrequest *subreq;
162 struct iov_iter *wreq_iter = &wreq->buffer.iter;
163
164 /* Make sure we don't point the iterator at a used-up folio_queue
165 * struct being used as a placeholder to prevent the queue from
166 * collapsing. In such a case, extend the queue.
167 */
168 if (iov_iter_is_folioq(wreq_iter) &&
169 wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
170 rolling_buffer_make_space(&wreq->buffer);
171
172 subreq = netfs_alloc_subrequest(wreq);
173 subreq->source = stream->source;
174 subreq->start = start;
175 subreq->stream_nr = stream->stream_nr;
176 subreq->io_iter = *wreq_iter;
177
178 _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
179
180 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
181
182 stream->sreq_max_len = UINT_MAX;
183 stream->sreq_max_segs = INT_MAX;
184 switch (stream->source) {
185 case NETFS_UPLOAD_TO_SERVER:
186 netfs_stat(&netfs_n_wh_upload);
187 stream->sreq_max_len = wreq->wsize;
188 break;
189 case NETFS_WRITE_TO_CACHE:
190 netfs_stat(&netfs_n_wh_write);
191 break;
192 default:
193 WARN_ON_ONCE(1);
194 break;
195 }
196
197 if (stream->prepare_write)
198 stream->prepare_write(subreq);
199
200 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
201
202 /* We add to the end of the list whilst the collector may be walking
203 * the list. The collector only goes nextwards and uses the lock to
204 * remove entries off of the front.
205 */
206 spin_lock(&wreq->lock);
207 list_add_tail(&subreq->rreq_link, &stream->subrequests);
208 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
209 stream->front = subreq;
210 if (!stream->active) {
211 stream->collected_to = stream->front->start;
212 /* Write list pointers before active flag */
213 smp_store_release(&stream->active, true);
214 }
215 }
216
217 spin_unlock(&wreq->lock);
218
219 stream->construct = subreq;
220 }
221
222 /*
223 * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
224 * operation. The operation may be asynchronous and should call
225 * netfs_write_subrequest_terminated() when complete.
226 */
netfs_do_issue_write(struct netfs_io_stream * stream,struct netfs_io_subrequest * subreq)227 static void netfs_do_issue_write(struct netfs_io_stream *stream,
228 struct netfs_io_subrequest *subreq)
229 {
230 struct netfs_io_request *wreq = subreq->rreq;
231
232 _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
233
234 if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
235 return netfs_write_subrequest_terminated(subreq, subreq->error);
236
237 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
238 stream->issue_write(subreq);
239 }
240
netfs_reissue_write(struct netfs_io_stream * stream,struct netfs_io_subrequest * subreq,struct iov_iter * source)241 void netfs_reissue_write(struct netfs_io_stream *stream,
242 struct netfs_io_subrequest *subreq,
243 struct iov_iter *source)
244 {
245 size_t size = subreq->len - subreq->transferred;
246
247 // TODO: Use encrypted buffer
248 subreq->io_iter = *source;
249 iov_iter_advance(source, size);
250 iov_iter_truncate(&subreq->io_iter, size);
251
252 subreq->retry_count++;
253 __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
254 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
255 netfs_stat(&netfs_n_wh_retry_write_subreq);
256 netfs_do_issue_write(stream, subreq);
257 }
258
netfs_issue_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream)259 void netfs_issue_write(struct netfs_io_request *wreq,
260 struct netfs_io_stream *stream)
261 {
262 struct netfs_io_subrequest *subreq = stream->construct;
263
264 if (!subreq)
265 return;
266 stream->construct = NULL;
267 subreq->io_iter.count = subreq->len;
268 netfs_do_issue_write(stream, subreq);
269 }
270
271 /*
272 * Add data to the write subrequest, dispatching each as we fill it up or if it
273 * is discontiguous with the previous. We only fill one part at a time so that
274 * we can avoid overrunning the credits obtained (cifs) and try to parallelise
275 * content-crypto preparation with network writes.
276 */
netfs_advance_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream,loff_t start,size_t len,bool to_eof)277 size_t netfs_advance_write(struct netfs_io_request *wreq,
278 struct netfs_io_stream *stream,
279 loff_t start, size_t len, bool to_eof)
280 {
281 struct netfs_io_subrequest *subreq = stream->construct;
282 size_t part;
283
284 if (!stream->avail) {
285 _leave("no write");
286 return len;
287 }
288
289 _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
290
291 if (subreq && start != subreq->start + subreq->len) {
292 netfs_issue_write(wreq, stream);
293 subreq = NULL;
294 }
295
296 if (!stream->construct)
297 netfs_prepare_write(wreq, stream, start);
298 subreq = stream->construct;
299
300 part = umin(stream->sreq_max_len - subreq->len, len);
301 _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len);
302 subreq->len += part;
303 subreq->nr_segs++;
304 stream->submit_extendable_to -= part;
305
306 if (subreq->len >= stream->sreq_max_len ||
307 subreq->nr_segs >= stream->sreq_max_segs ||
308 to_eof) {
309 netfs_issue_write(wreq, stream);
310 subreq = NULL;
311 }
312
313 return part;
314 }
315
316 /*
317 * Write some of a pending folio data back to the server.
318 */
netfs_write_folio(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * folio)319 static int netfs_write_folio(struct netfs_io_request *wreq,
320 struct writeback_control *wbc,
321 struct folio *folio)
322 {
323 struct netfs_io_stream *upload = &wreq->io_streams[0];
324 struct netfs_io_stream *cache = &wreq->io_streams[1];
325 struct netfs_io_stream *stream;
326 struct netfs_group *fgroup; /* TODO: Use this with ceph */
327 struct netfs_folio *finfo;
328 size_t iter_off = 0;
329 size_t fsize = folio_size(folio), flen = fsize, foff = 0;
330 loff_t fpos = folio_pos(folio), i_size;
331 bool to_eof = false, streamw = false;
332 bool debug = false;
333
334 _enter("");
335
336 if (rolling_buffer_make_space(&wreq->buffer) < 0)
337 return -ENOMEM;
338
339 /* netfs_perform_write() may shift i_size around the page or from out
340 * of the page to beyond it, but cannot move i_size into or through the
341 * page since we have it locked.
342 */
343 i_size = i_size_read(wreq->inode);
344
345 if (fpos >= i_size) {
346 /* mmap beyond eof. */
347 _debug("beyond eof");
348 folio_start_writeback(folio);
349 folio_unlock(folio);
350 wreq->nr_group_rel += netfs_folio_written_back(folio);
351 netfs_put_group_many(wreq->group, wreq->nr_group_rel);
352 wreq->nr_group_rel = 0;
353 return 0;
354 }
355
356 if (fpos + fsize > wreq->i_size)
357 wreq->i_size = i_size;
358
359 fgroup = netfs_folio_group(folio);
360 finfo = netfs_folio_info(folio);
361 if (finfo) {
362 foff = finfo->dirty_offset;
363 flen = foff + finfo->dirty_len;
364 streamw = true;
365 }
366
367 if (wreq->origin == NETFS_WRITETHROUGH) {
368 to_eof = false;
369 if (flen > i_size - fpos)
370 flen = i_size - fpos;
371 } else if (flen > i_size - fpos) {
372 flen = i_size - fpos;
373 if (!streamw)
374 folio_zero_segment(folio, flen, fsize);
375 to_eof = true;
376 } else if (flen == i_size - fpos) {
377 to_eof = true;
378 }
379 flen -= foff;
380
381 _debug("folio %zx %zx %zx", foff, flen, fsize);
382
383 /* Deal with discontinuities in the stream of dirty pages. These can
384 * arise from a number of sources:
385 *
386 * (1) Intervening non-dirty pages from random-access writes, multiple
387 * flushers writing back different parts simultaneously and manual
388 * syncing.
389 *
390 * (2) Partially-written pages from write-streaming.
391 *
392 * (3) Pages that belong to a different write-back group (eg. Ceph
393 * snapshots).
394 *
395 * (4) Actually-clean pages that were marked for write to the cache
396 * when they were read. Note that these appear as a special
397 * write-back group.
398 */
399 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
400 netfs_issue_write(wreq, upload);
401 } else if (fgroup != wreq->group) {
402 /* We can't write this page to the server yet. */
403 kdebug("wrong group");
404 folio_redirty_for_writepage(wbc, folio);
405 folio_unlock(folio);
406 netfs_issue_write(wreq, upload);
407 netfs_issue_write(wreq, cache);
408 return 0;
409 }
410
411 if (foff > 0)
412 netfs_issue_write(wreq, upload);
413 if (streamw)
414 netfs_issue_write(wreq, cache);
415
416 /* Flip the page to the writeback state and unlock. If we're called
417 * from write-through, then the page has already been put into the wb
418 * state.
419 */
420 if (wreq->origin == NETFS_WRITEBACK)
421 folio_start_writeback(folio);
422 folio_unlock(folio);
423
424 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
425 if (!cache->avail) {
426 trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
427 netfs_issue_write(wreq, upload);
428 netfs_folio_written_back(folio);
429 return 0;
430 }
431 trace_netfs_folio(folio, netfs_folio_trace_store_copy);
432 } else if (!upload->avail && !cache->avail) {
433 trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
434 netfs_folio_written_back(folio);
435 return 0;
436 } else if (!upload->construct) {
437 trace_netfs_folio(folio, netfs_folio_trace_store);
438 } else {
439 trace_netfs_folio(folio, netfs_folio_trace_store_plus);
440 }
441
442 /* Attach the folio to the rolling buffer. */
443 rolling_buffer_append(&wreq->buffer, folio, 0);
444
445 /* Move the submission point forward to allow for write-streaming data
446 * not starting at the front of the page. We don't do write-streaming
447 * with the cache as the cache requires DIO alignment.
448 *
449 * Also skip uploading for data that's been read and just needs copying
450 * to the cache.
451 */
452 for (int s = 0; s < NR_IO_STREAMS; s++) {
453 stream = &wreq->io_streams[s];
454 stream->submit_off = foff;
455 stream->submit_len = flen;
456 if (!stream->avail ||
457 (stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
458 (stream->source == NETFS_UPLOAD_TO_SERVER &&
459 fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
460 stream->submit_off = UINT_MAX;
461 stream->submit_len = 0;
462 }
463 }
464
465 /* Attach the folio to one or more subrequests. For a big folio, we
466 * could end up with thousands of subrequests if the wsize is small -
467 * but we might need to wait during the creation of subrequests for
468 * network resources (eg. SMB credits).
469 */
470 for (;;) {
471 ssize_t part;
472 size_t lowest_off = ULONG_MAX;
473 int choose_s = -1;
474
475 /* Always add to the lowest-submitted stream first. */
476 for (int s = 0; s < NR_IO_STREAMS; s++) {
477 stream = &wreq->io_streams[s];
478 if (stream->submit_len > 0 &&
479 stream->submit_off < lowest_off) {
480 lowest_off = stream->submit_off;
481 choose_s = s;
482 }
483 }
484
485 if (choose_s < 0)
486 break;
487 stream = &wreq->io_streams[choose_s];
488
489 /* Advance the iterator(s). */
490 if (stream->submit_off > iter_off) {
491 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
492 iter_off = stream->submit_off;
493 }
494
495 atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
496 stream->submit_extendable_to = fsize - stream->submit_off;
497 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
498 stream->submit_len, to_eof);
499 stream->submit_off += part;
500 if (part > stream->submit_len)
501 stream->submit_len = 0;
502 else
503 stream->submit_len -= part;
504 if (part > 0)
505 debug = true;
506 }
507
508 if (fsize > iter_off)
509 rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
510 atomic64_set(&wreq->issued_to, fpos + fsize);
511
512 if (!debug)
513 kdebug("R=%x: No submit", wreq->debug_id);
514
515 if (foff + flen < fsize)
516 for (int s = 0; s < NR_IO_STREAMS; s++)
517 netfs_issue_write(wreq, &wreq->io_streams[s]);
518
519 _leave(" = 0");
520 return 0;
521 }
522
523 /*
524 * End the issuing of writes, letting the collector know we're done.
525 */
netfs_end_issue_write(struct netfs_io_request * wreq)526 static void netfs_end_issue_write(struct netfs_io_request *wreq)
527 {
528 bool needs_poke = true;
529
530 smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
531 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
532
533 for (int s = 0; s < NR_IO_STREAMS; s++) {
534 struct netfs_io_stream *stream = &wreq->io_streams[s];
535
536 if (!stream->active)
537 continue;
538 if (!list_empty(&stream->subrequests))
539 needs_poke = false;
540 netfs_issue_write(wreq, stream);
541 }
542
543 if (needs_poke)
544 netfs_wake_collector(wreq);
545 }
546
547 /*
548 * Write some of the pending data back to the server
549 */
netfs_writepages(struct address_space * mapping,struct writeback_control * wbc)550 int netfs_writepages(struct address_space *mapping,
551 struct writeback_control *wbc)
552 {
553 struct netfs_inode *ictx = netfs_inode(mapping->host);
554 struct netfs_io_request *wreq = NULL;
555 struct folio *folio;
556 int error = 0;
557
558 if (!mutex_trylock(&ictx->wb_lock)) {
559 if (wbc->sync_mode == WB_SYNC_NONE) {
560 netfs_stat(&netfs_n_wb_lock_skip);
561 return 0;
562 }
563 netfs_stat(&netfs_n_wb_lock_wait);
564 mutex_lock(&ictx->wb_lock);
565 }
566
567 /* Need the first folio to be able to set up the op. */
568 folio = writeback_iter(mapping, wbc, NULL, &error);
569 if (!folio)
570 goto out;
571
572 wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
573 if (IS_ERR(wreq)) {
574 error = PTR_ERR(wreq);
575 goto couldnt_start;
576 }
577
578 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
579 trace_netfs_write(wreq, netfs_write_trace_writeback);
580 netfs_stat(&netfs_n_wh_writepages);
581
582 do {
583 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
584
585 /* It appears we don't have to handle cyclic writeback wrapping. */
586 WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to));
587
588 if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
589 unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
590 set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
591 wreq->netfs_ops->begin_writeback(wreq);
592 }
593
594 error = netfs_write_folio(wreq, wbc, folio);
595 if (error < 0)
596 break;
597 } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
598
599 netfs_end_issue_write(wreq);
600
601 mutex_unlock(&ictx->wb_lock);
602 netfs_wake_collector(wreq);
603
604 netfs_put_request(wreq, netfs_rreq_trace_put_return);
605 _leave(" = %d", error);
606 return error;
607
608 couldnt_start:
609 netfs_kill_dirty_pages(mapping, wbc, folio);
610 out:
611 mutex_unlock(&ictx->wb_lock);
612 _leave(" = %d", error);
613 return error;
614 }
615 EXPORT_SYMBOL(netfs_writepages);
616
617 /*
618 * Begin a write operation for writing through the pagecache.
619 */
netfs_begin_writethrough(struct kiocb * iocb,size_t len)620 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
621 {
622 struct netfs_io_request *wreq = NULL;
623 struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
624
625 mutex_lock(&ictx->wb_lock);
626
627 wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
628 iocb->ki_pos, NETFS_WRITETHROUGH);
629 if (IS_ERR(wreq)) {
630 mutex_unlock(&ictx->wb_lock);
631 return wreq;
632 }
633
634 wreq->io_streams[0].avail = true;
635 trace_netfs_write(wreq, netfs_write_trace_writethrough);
636 return wreq;
637 }
638
639 /*
640 * Advance the state of the write operation used when writing through the
641 * pagecache. Data has been copied into the pagecache that we need to append
642 * to the request. If we've added more than wsize then we need to create a new
643 * subrequest.
644 */
netfs_advance_writethrough(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * folio,size_t copied,bool to_page_end,struct folio ** writethrough_cache)645 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
646 struct folio *folio, size_t copied, bool to_page_end,
647 struct folio **writethrough_cache)
648 {
649 _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
650 wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
651
652 if (!*writethrough_cache) {
653 if (folio_test_dirty(folio))
654 /* Sigh. mmap. */
655 folio_clear_dirty_for_io(folio);
656
657 /* We can make multiple writes to the folio... */
658 folio_start_writeback(folio);
659 if (wreq->len == 0)
660 trace_netfs_folio(folio, netfs_folio_trace_wthru);
661 else
662 trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
663 *writethrough_cache = folio;
664 }
665
666 wreq->len += copied;
667 if (!to_page_end)
668 return 0;
669
670 *writethrough_cache = NULL;
671 return netfs_write_folio(wreq, wbc, folio);
672 }
673
674 /*
675 * End a write operation used when writing through the pagecache.
676 */
netfs_end_writethrough(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * writethrough_cache)677 ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
678 struct folio *writethrough_cache)
679 {
680 struct netfs_inode *ictx = netfs_inode(wreq->inode);
681 ssize_t ret;
682
683 _enter("R=%x", wreq->debug_id);
684
685 if (writethrough_cache)
686 netfs_write_folio(wreq, wbc, writethrough_cache);
687
688 netfs_end_issue_write(wreq);
689
690 mutex_unlock(&ictx->wb_lock);
691
692 if (wreq->iocb)
693 ret = -EIOCBQUEUED;
694 else
695 ret = netfs_wait_for_write(wreq);
696 netfs_put_request(wreq, netfs_rreq_trace_put_return);
697 return ret;
698 }
699
700 /*
701 * Write data to the server without going through the pagecache and without
702 * writing it to the local cache.
703 */
netfs_unbuffered_write(struct netfs_io_request * wreq,bool may_wait,size_t len)704 int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
705 {
706 struct netfs_io_stream *upload = &wreq->io_streams[0];
707 ssize_t part;
708 loff_t start = wreq->start;
709 int error = 0;
710
711 _enter("%zx", len);
712
713 if (wreq->origin == NETFS_DIO_WRITE)
714 inode_dio_begin(wreq->inode);
715
716 while (len) {
717 // TODO: Prepare content encryption
718
719 _debug("unbuffered %zx", len);
720 part = netfs_advance_write(wreq, upload, start, len, false);
721 start += part;
722 len -= part;
723 rolling_buffer_advance(&wreq->buffer, part);
724 if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
725 netfs_wait_for_paused_write(wreq);
726 if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
727 break;
728 }
729
730 netfs_end_issue_write(wreq);
731 _leave(" = %d", error);
732 return error;
733 }
734
735 /*
736 * Write some of a pending folio data back to the server and/or the cache.
737 */
netfs_write_folio_single(struct netfs_io_request * wreq,struct folio * folio)738 static int netfs_write_folio_single(struct netfs_io_request *wreq,
739 struct folio *folio)
740 {
741 struct netfs_io_stream *upload = &wreq->io_streams[0];
742 struct netfs_io_stream *cache = &wreq->io_streams[1];
743 struct netfs_io_stream *stream;
744 size_t iter_off = 0;
745 size_t fsize = folio_size(folio), flen;
746 loff_t fpos = folio_pos(folio);
747 bool to_eof = false;
748 bool no_debug = false;
749
750 _enter("");
751
752 flen = folio_size(folio);
753 if (flen > wreq->i_size - fpos) {
754 flen = wreq->i_size - fpos;
755 folio_zero_segment(folio, flen, fsize);
756 to_eof = true;
757 } else if (flen == wreq->i_size - fpos) {
758 to_eof = true;
759 }
760
761 _debug("folio %zx/%zx", flen, fsize);
762
763 if (!upload->avail && !cache->avail) {
764 trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
765 return 0;
766 }
767
768 if (!upload->construct)
769 trace_netfs_folio(folio, netfs_folio_trace_store);
770 else
771 trace_netfs_folio(folio, netfs_folio_trace_store_plus);
772
773 /* Attach the folio to the rolling buffer. */
774 folio_get(folio);
775 rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);
776
777 /* Move the submission point forward to allow for write-streaming data
778 * not starting at the front of the page. We don't do write-streaming
779 * with the cache as the cache requires DIO alignment.
780 *
781 * Also skip uploading for data that's been read and just needs copying
782 * to the cache.
783 */
784 for (int s = 0; s < NR_IO_STREAMS; s++) {
785 stream = &wreq->io_streams[s];
786 stream->submit_off = 0;
787 stream->submit_len = flen;
788 if (!stream->avail) {
789 stream->submit_off = UINT_MAX;
790 stream->submit_len = 0;
791 }
792 }
793
794 /* Attach the folio to one or more subrequests. For a big folio, we
795 * could end up with thousands of subrequests if the wsize is small -
796 * but we might need to wait during the creation of subrequests for
797 * network resources (eg. SMB credits).
798 */
799 for (;;) {
800 ssize_t part;
801 size_t lowest_off = ULONG_MAX;
802 int choose_s = -1;
803
804 /* Always add to the lowest-submitted stream first. */
805 for (int s = 0; s < NR_IO_STREAMS; s++) {
806 stream = &wreq->io_streams[s];
807 if (stream->submit_len > 0 &&
808 stream->submit_off < lowest_off) {
809 lowest_off = stream->submit_off;
810 choose_s = s;
811 }
812 }
813
814 if (choose_s < 0)
815 break;
816 stream = &wreq->io_streams[choose_s];
817
818 /* Advance the iterator(s). */
819 if (stream->submit_off > iter_off) {
820 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
821 iter_off = stream->submit_off;
822 }
823
824 atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
825 stream->submit_extendable_to = fsize - stream->submit_off;
826 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
827 stream->submit_len, to_eof);
828 stream->submit_off += part;
829 if (part > stream->submit_len)
830 stream->submit_len = 0;
831 else
832 stream->submit_len -= part;
833 if (part > 0)
834 no_debug = true;
835 }
836
837 wreq->buffer.iter.iov_offset = 0;
838 if (fsize > iter_off)
839 rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
840 atomic64_set(&wreq->issued_to, fpos + fsize);
841
842 if (!no_debug)
843 kdebug("R=%x: No submit", wreq->debug_id);
844 _leave(" = 0");
845 return 0;
846 }
847
848 /**
849 * netfs_writeback_single - Write back a monolithic payload
850 * @mapping: The mapping to write from
851 * @wbc: Hints from the VM
852 * @iter: Data to write, must be ITER_FOLIOQ.
853 *
854 * Write a monolithic, non-pagecache object back to the server and/or
855 * the cache.
856 */
netfs_writeback_single(struct address_space * mapping,struct writeback_control * wbc,struct iov_iter * iter)857 int netfs_writeback_single(struct address_space *mapping,
858 struct writeback_control *wbc,
859 struct iov_iter *iter)
860 {
861 struct netfs_io_request *wreq;
862 struct netfs_inode *ictx = netfs_inode(mapping->host);
863 struct folio_queue *fq;
864 size_t size = iov_iter_count(iter);
865 int ret;
866
867 if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
868 return -EIO;
869
870 if (!mutex_trylock(&ictx->wb_lock)) {
871 if (wbc->sync_mode == WB_SYNC_NONE) {
872 netfs_stat(&netfs_n_wb_lock_skip);
873 return 0;
874 }
875 netfs_stat(&netfs_n_wb_lock_wait);
876 mutex_lock(&ictx->wb_lock);
877 }
878
879 wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
880 if (IS_ERR(wreq)) {
881 ret = PTR_ERR(wreq);
882 goto couldnt_start;
883 }
884
885 __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
886 trace_netfs_write(wreq, netfs_write_trace_writeback_single);
887 netfs_stat(&netfs_n_wh_writepages);
888
889 if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
890 wreq->netfs_ops->begin_writeback(wreq);
891
892 for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
893 for (int slot = 0; slot < folioq_count(fq); slot++) {
894 struct folio *folio = folioq_folio(fq, slot);
895 size_t part = umin(folioq_folio_size(fq, slot), size);
896
897 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
898
899 ret = netfs_write_folio_single(wreq, folio);
900 if (ret < 0)
901 goto stop;
902 size -= part;
903 if (size <= 0)
904 goto stop;
905 }
906 }
907
908 stop:
909 for (int s = 0; s < NR_IO_STREAMS; s++)
910 netfs_issue_write(wreq, &wreq->io_streams[s]);
911 smp_wmb(); /* Write lists before ALL_QUEUED. */
912 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
913
914 mutex_unlock(&ictx->wb_lock);
915 netfs_wake_collector(wreq);
916
917 netfs_put_request(wreq, netfs_rreq_trace_put_return);
918 _leave(" = %d", ret);
919 return ret;
920
921 couldnt_start:
922 mutex_unlock(&ictx->wb_lock);
923 _leave(" = %d", ret);
924 return ret;
925 }
926 EXPORT_SYMBOL(netfs_writeback_single);
927