1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem high-level (buffered) writeback.
3 *
4 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 *
8 * To support network filesystems with local caching, we manage a situation
9 * that can be envisioned like the following:
10 *
11 * +---+---+-----+-----+---+----------+
12 * Folios: | | | | | | |
13 * +---+---+-----+-----+---+----------+
14 *
15 * +------+------+ +----+----+
16 * Upload: | | |.....| | |
17 * (Stream 0) +------+------+ +----+----+
18 *
19 * +------+------+------+------+------+
20 * Cache: | | | | | |
21 * (Stream 1) +------+------+------+------+------+
22 *
23 * Where we have a sequence of folios of varying sizes that we need to overlay
24 * with multiple parallel streams of I/O requests, where the I/O requests in a
25 * stream may also be of various sizes (in cifs, for example, the sizes are
26 * negotiated with the server; in something like ceph, they may represent the
27 * sizes of storage objects).
28 *
29 * The sequence in each stream may contain gaps and noncontiguous subrequests
30 * may be glued together into single vectored write RPCs.
31 */
32
33 #include <linux/export.h>
34 #include <linux/fs.h>
35 #include <linux/mm.h>
36 #include <linux/pagemap.h>
37 #include "internal.h"
38
39 /*
40 * Kill all dirty folios in the event of an unrecoverable error, starting with
41 * a locked folio we've already obtained from writeback_iter().
42 */
netfs_kill_dirty_pages(struct address_space * mapping,struct writeback_control * wbc,struct folio * folio)43 static void netfs_kill_dirty_pages(struct address_space *mapping,
44 struct writeback_control *wbc,
45 struct folio *folio)
46 {
47 int error = 0;
48
49 do {
50 enum netfs_folio_trace why = netfs_folio_trace_kill;
51 struct netfs_group *group = NULL;
52 struct netfs_folio *finfo = NULL;
53 void *priv;
54
55 priv = folio_detach_private(folio);
56 if (priv) {
57 finfo = __netfs_folio_info(priv);
58 if (finfo) {
59 /* Kill folio from streaming write. */
60 group = finfo->netfs_group;
61 why = netfs_folio_trace_kill_s;
62 } else {
63 group = priv;
64 if (group == NETFS_FOLIO_COPY_TO_CACHE) {
65 /* Kill copy-to-cache folio */
66 why = netfs_folio_trace_kill_cc;
67 group = NULL;
68 } else {
69 /* Kill folio with group */
70 why = netfs_folio_trace_kill_g;
71 }
72 }
73 }
74
75 trace_netfs_folio(folio, why);
76
77 folio_start_writeback(folio);
78 folio_unlock(folio);
79 folio_end_writeback(folio);
80
81 netfs_put_group(group);
82 kfree(finfo);
83
84 } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
85 }
86
87 /*
88 * Create a write request and set it up appropriately for the origin type.
89 */
netfs_create_write_req(struct address_space * mapping,struct file * file,loff_t start,enum netfs_io_origin origin)90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
91 struct file *file,
92 loff_t start,
93 enum netfs_io_origin origin)
94 {
95 struct netfs_io_request *wreq;
96 struct netfs_inode *ictx;
97 bool is_cacheable = (origin == NETFS_WRITEBACK ||
98 origin == NETFS_WRITEBACK_SINGLE ||
99 origin == NETFS_WRITETHROUGH ||
100 origin == NETFS_PGPRIV2_COPY_TO_CACHE);
101
102 wreq = netfs_alloc_request(mapping, file, start, 0, origin);
103 if (IS_ERR(wreq))
104 return wreq;
105
106 _enter("R=%x", wreq->debug_id);
107
108 ictx = netfs_inode(wreq->inode);
109 if (is_cacheable && netfs_is_cache_enabled(ictx))
110 fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
111 if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
112 goto nomem;
113
114 wreq->cleaned_to = wreq->start;
115
116 wreq->io_streams[0].stream_nr = 0;
117 wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER;
118 wreq->io_streams[0].prepare_write = ictx->ops->prepare_write;
119 wreq->io_streams[0].issue_write = ictx->ops->issue_write;
120 wreq->io_streams[0].collected_to = start;
121 wreq->io_streams[0].transferred = LONG_MAX;
122
123 wreq->io_streams[1].stream_nr = 1;
124 wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE;
125 wreq->io_streams[1].collected_to = start;
126 wreq->io_streams[1].transferred = LONG_MAX;
127 if (fscache_resources_valid(&wreq->cache_resources)) {
128 wreq->io_streams[1].avail = true;
129 wreq->io_streams[1].active = true;
130 wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
131 wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
132 }
133
134 return wreq;
135 nomem:
136 wreq->error = -ENOMEM;
137 netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
138 return ERR_PTR(-ENOMEM);
139 }
140
141 /**
142 * netfs_prepare_write_failed - Note write preparation failed
143 * @subreq: The subrequest to mark
144 *
145 * Mark a subrequest to note that preparation for write failed.
146 */
netfs_prepare_write_failed(struct netfs_io_subrequest * subreq)147 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
148 {
149 __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
150 trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
151 }
152 EXPORT_SYMBOL(netfs_prepare_write_failed);
153
154 /*
155 * Prepare a write subrequest. We need to allocate a new subrequest
156 * if we don't have one.
157 */
netfs_prepare_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream,loff_t start)158 static void netfs_prepare_write(struct netfs_io_request *wreq,
159 struct netfs_io_stream *stream,
160 loff_t start)
161 {
162 struct netfs_io_subrequest *subreq;
163 struct iov_iter *wreq_iter = &wreq->buffer.iter;
164
165 /* Make sure we don't point the iterator at a used-up folio_queue
166 * struct being used as a placeholder to prevent the queue from
167 * collapsing. In such a case, extend the queue.
168 */
169 if (iov_iter_is_folioq(wreq_iter) &&
170 wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
171 rolling_buffer_make_space(&wreq->buffer);
172
173 subreq = netfs_alloc_subrequest(wreq);
174 subreq->source = stream->source;
175 subreq->start = start;
176 subreq->stream_nr = stream->stream_nr;
177 subreq->io_iter = *wreq_iter;
178
179 _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
180
181 trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
182
183 stream->sreq_max_len = UINT_MAX;
184 stream->sreq_max_segs = INT_MAX;
185 switch (stream->source) {
186 case NETFS_UPLOAD_TO_SERVER:
187 netfs_stat(&netfs_n_wh_upload);
188 stream->sreq_max_len = wreq->wsize;
189 break;
190 case NETFS_WRITE_TO_CACHE:
191 netfs_stat(&netfs_n_wh_write);
192 break;
193 default:
194 WARN_ON_ONCE(1);
195 break;
196 }
197
198 if (stream->prepare_write)
199 stream->prepare_write(subreq);
200
201 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
202
203 /* We add to the end of the list whilst the collector may be walking
204 * the list. The collector only goes nextwards and uses the lock to
205 * remove entries off of the front.
206 */
207 spin_lock(&wreq->lock);
208 list_add_tail(&subreq->rreq_link, &stream->subrequests);
209 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
210 stream->front = subreq;
211 if (!stream->active) {
212 stream->collected_to = stream->front->start;
213 /* Write list pointers before active flag */
214 smp_store_release(&stream->active, true);
215 }
216 }
217
218 spin_unlock(&wreq->lock);
219
220 stream->construct = subreq;
221 }
222
223 /*
224 * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
225 * operation. The operation may be asynchronous and should call
226 * netfs_write_subrequest_terminated() when complete.
227 */
netfs_do_issue_write(struct netfs_io_stream * stream,struct netfs_io_subrequest * subreq)228 static void netfs_do_issue_write(struct netfs_io_stream *stream,
229 struct netfs_io_subrequest *subreq)
230 {
231 struct netfs_io_request *wreq = subreq->rreq;
232
233 _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
234
235 if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
236 return netfs_write_subrequest_terminated(subreq, subreq->error, false);
237
238 trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
239 stream->issue_write(subreq);
240 }
241
netfs_reissue_write(struct netfs_io_stream * stream,struct netfs_io_subrequest * subreq,struct iov_iter * source)242 void netfs_reissue_write(struct netfs_io_stream *stream,
243 struct netfs_io_subrequest *subreq,
244 struct iov_iter *source)
245 {
246 size_t size = subreq->len - subreq->transferred;
247
248 // TODO: Use encrypted buffer
249 subreq->io_iter = *source;
250 iov_iter_advance(source, size);
251 iov_iter_truncate(&subreq->io_iter, size);
252
253 subreq->retry_count++;
254 __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
255 __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
256 netfs_stat(&netfs_n_wh_retry_write_subreq);
257 netfs_do_issue_write(stream, subreq);
258 }
259
netfs_issue_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream)260 void netfs_issue_write(struct netfs_io_request *wreq,
261 struct netfs_io_stream *stream)
262 {
263 struct netfs_io_subrequest *subreq = stream->construct;
264
265 if (!subreq)
266 return;
267 stream->construct = NULL;
268 subreq->io_iter.count = subreq->len;
269 netfs_do_issue_write(stream, subreq);
270 }
271
272 /*
273 * Add data to the write subrequest, dispatching each as we fill it up or if it
274 * is discontiguous with the previous. We only fill one part at a time so that
275 * we can avoid overrunning the credits obtained (cifs) and try to parallelise
276 * content-crypto preparation with network writes.
277 */
netfs_advance_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream,loff_t start,size_t len,bool to_eof)278 size_t netfs_advance_write(struct netfs_io_request *wreq,
279 struct netfs_io_stream *stream,
280 loff_t start, size_t len, bool to_eof)
281 {
282 struct netfs_io_subrequest *subreq = stream->construct;
283 size_t part;
284
285 if (!stream->avail) {
286 _leave("no write");
287 return len;
288 }
289
290 _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
291
292 if (subreq && start != subreq->start + subreq->len) {
293 netfs_issue_write(wreq, stream);
294 subreq = NULL;
295 }
296
297 if (!stream->construct)
298 netfs_prepare_write(wreq, stream, start);
299 subreq = stream->construct;
300
301 part = umin(stream->sreq_max_len - subreq->len, len);
302 _debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len);
303 subreq->len += part;
304 subreq->nr_segs++;
305 stream->submit_extendable_to -= part;
306
307 if (subreq->len >= stream->sreq_max_len ||
308 subreq->nr_segs >= stream->sreq_max_segs ||
309 to_eof) {
310 netfs_issue_write(wreq, stream);
311 subreq = NULL;
312 }
313
314 return part;
315 }
316
317 /*
318 * Write some of a pending folio data back to the server.
319 */
netfs_write_folio(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * folio)320 static int netfs_write_folio(struct netfs_io_request *wreq,
321 struct writeback_control *wbc,
322 struct folio *folio)
323 {
324 struct netfs_io_stream *upload = &wreq->io_streams[0];
325 struct netfs_io_stream *cache = &wreq->io_streams[1];
326 struct netfs_io_stream *stream;
327 struct netfs_group *fgroup; /* TODO: Use this with ceph */
328 struct netfs_folio *finfo;
329 size_t iter_off = 0;
330 size_t fsize = folio_size(folio), flen = fsize, foff = 0;
331 loff_t fpos = folio_pos(folio), i_size;
332 bool to_eof = false, streamw = false;
333 bool debug = false;
334
335 _enter("");
336
337 if (rolling_buffer_make_space(&wreq->buffer) < 0)
338 return -ENOMEM;
339
340 /* netfs_perform_write() may shift i_size around the page or from out
341 * of the page to beyond it, but cannot move i_size into or through the
342 * page since we have it locked.
343 */
344 i_size = i_size_read(wreq->inode);
345
346 if (fpos >= i_size) {
347 /* mmap beyond eof. */
348 _debug("beyond eof");
349 folio_start_writeback(folio);
350 folio_unlock(folio);
351 wreq->nr_group_rel += netfs_folio_written_back(folio);
352 netfs_put_group_many(wreq->group, wreq->nr_group_rel);
353 wreq->nr_group_rel = 0;
354 return 0;
355 }
356
357 if (fpos + fsize > wreq->i_size)
358 wreq->i_size = i_size;
359
360 fgroup = netfs_folio_group(folio);
361 finfo = netfs_folio_info(folio);
362 if (finfo) {
363 foff = finfo->dirty_offset;
364 flen = foff + finfo->dirty_len;
365 streamw = true;
366 }
367
368 if (wreq->origin == NETFS_WRITETHROUGH) {
369 to_eof = false;
370 if (flen > i_size - fpos)
371 flen = i_size - fpos;
372 } else if (flen > i_size - fpos) {
373 flen = i_size - fpos;
374 if (!streamw)
375 folio_zero_segment(folio, flen, fsize);
376 to_eof = true;
377 } else if (flen == i_size - fpos) {
378 to_eof = true;
379 }
380 flen -= foff;
381
382 _debug("folio %zx %zx %zx", foff, flen, fsize);
383
384 /* Deal with discontinuities in the stream of dirty pages. These can
385 * arise from a number of sources:
386 *
387 * (1) Intervening non-dirty pages from random-access writes, multiple
388 * flushers writing back different parts simultaneously and manual
389 * syncing.
390 *
391 * (2) Partially-written pages from write-streaming.
392 *
393 * (3) Pages that belong to a different write-back group (eg. Ceph
394 * snapshots).
395 *
396 * (4) Actually-clean pages that were marked for write to the cache
397 * when they were read. Note that these appear as a special
398 * write-back group.
399 */
400 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
401 netfs_issue_write(wreq, upload);
402 } else if (fgroup != wreq->group) {
403 /* We can't write this page to the server yet. */
404 kdebug("wrong group");
405 folio_redirty_for_writepage(wbc, folio);
406 folio_unlock(folio);
407 netfs_issue_write(wreq, upload);
408 netfs_issue_write(wreq, cache);
409 return 0;
410 }
411
412 if (foff > 0)
413 netfs_issue_write(wreq, upload);
414 if (streamw)
415 netfs_issue_write(wreq, cache);
416
417 /* Flip the page to the writeback state and unlock. If we're called
418 * from write-through, then the page has already been put into the wb
419 * state.
420 */
421 if (wreq->origin == NETFS_WRITEBACK)
422 folio_start_writeback(folio);
423 folio_unlock(folio);
424
425 if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
426 if (!cache->avail) {
427 trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
428 netfs_issue_write(wreq, upload);
429 netfs_folio_written_back(folio);
430 return 0;
431 }
432 trace_netfs_folio(folio, netfs_folio_trace_store_copy);
433 } else if (!upload->avail && !cache->avail) {
434 trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
435 netfs_folio_written_back(folio);
436 return 0;
437 } else if (!upload->construct) {
438 trace_netfs_folio(folio, netfs_folio_trace_store);
439 } else {
440 trace_netfs_folio(folio, netfs_folio_trace_store_plus);
441 }
442
443 /* Attach the folio to the rolling buffer. */
444 rolling_buffer_append(&wreq->buffer, folio, 0);
445
446 /* Move the submission point forward to allow for write-streaming data
447 * not starting at the front of the page. We don't do write-streaming
448 * with the cache as the cache requires DIO alignment.
449 *
450 * Also skip uploading for data that's been read and just needs copying
451 * to the cache.
452 */
453 for (int s = 0; s < NR_IO_STREAMS; s++) {
454 stream = &wreq->io_streams[s];
455 stream->submit_off = foff;
456 stream->submit_len = flen;
457 if (!stream->avail ||
458 (stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
459 (stream->source == NETFS_UPLOAD_TO_SERVER &&
460 fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
461 stream->submit_off = UINT_MAX;
462 stream->submit_len = 0;
463 }
464 }
465
466 /* Attach the folio to one or more subrequests. For a big folio, we
467 * could end up with thousands of subrequests if the wsize is small -
468 * but we might need to wait during the creation of subrequests for
469 * network resources (eg. SMB credits).
470 */
471 for (;;) {
472 ssize_t part;
473 size_t lowest_off = ULONG_MAX;
474 int choose_s = -1;
475
476 /* Always add to the lowest-submitted stream first. */
477 for (int s = 0; s < NR_IO_STREAMS; s++) {
478 stream = &wreq->io_streams[s];
479 if (stream->submit_len > 0 &&
480 stream->submit_off < lowest_off) {
481 lowest_off = stream->submit_off;
482 choose_s = s;
483 }
484 }
485
486 if (choose_s < 0)
487 break;
488 stream = &wreq->io_streams[choose_s];
489
490 /* Advance the iterator(s). */
491 if (stream->submit_off > iter_off) {
492 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
493 iter_off = stream->submit_off;
494 }
495
496 atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
497 stream->submit_extendable_to = fsize - stream->submit_off;
498 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
499 stream->submit_len, to_eof);
500 stream->submit_off += part;
501 if (part > stream->submit_len)
502 stream->submit_len = 0;
503 else
504 stream->submit_len -= part;
505 if (part > 0)
506 debug = true;
507 }
508
509 if (fsize > iter_off)
510 rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
511 atomic64_set(&wreq->issued_to, fpos + fsize);
512
513 if (!debug)
514 kdebug("R=%x: No submit", wreq->debug_id);
515
516 if (foff + flen < fsize)
517 for (int s = 0; s < NR_IO_STREAMS; s++)
518 netfs_issue_write(wreq, &wreq->io_streams[s]);
519
520 _leave(" = 0");
521 return 0;
522 }
523
524 /*
525 * End the issuing of writes, letting the collector know we're done.
526 */
netfs_end_issue_write(struct netfs_io_request * wreq)527 static void netfs_end_issue_write(struct netfs_io_request *wreq)
528 {
529 bool needs_poke = true;
530
531 smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
532 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
533
534 for (int s = 0; s < NR_IO_STREAMS; s++) {
535 struct netfs_io_stream *stream = &wreq->io_streams[s];
536
537 if (!stream->active)
538 continue;
539 if (!list_empty(&stream->subrequests))
540 needs_poke = false;
541 netfs_issue_write(wreq, stream);
542 }
543
544 if (needs_poke)
545 netfs_wake_write_collector(wreq, false);
546 }
547
548 /*
549 * Write some of the pending data back to the server
550 */
netfs_writepages(struct address_space * mapping,struct writeback_control * wbc)551 int netfs_writepages(struct address_space *mapping,
552 struct writeback_control *wbc)
553 {
554 struct netfs_inode *ictx = netfs_inode(mapping->host);
555 struct netfs_io_request *wreq = NULL;
556 struct folio *folio;
557 int error = 0;
558
559 if (!mutex_trylock(&ictx->wb_lock)) {
560 if (wbc->sync_mode == WB_SYNC_NONE) {
561 netfs_stat(&netfs_n_wb_lock_skip);
562 return 0;
563 }
564 netfs_stat(&netfs_n_wb_lock_wait);
565 mutex_lock(&ictx->wb_lock);
566 }
567
568 /* Need the first folio to be able to set up the op. */
569 folio = writeback_iter(mapping, wbc, NULL, &error);
570 if (!folio)
571 goto out;
572
573 wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
574 if (IS_ERR(wreq)) {
575 error = PTR_ERR(wreq);
576 goto couldnt_start;
577 }
578
579 trace_netfs_write(wreq, netfs_write_trace_writeback);
580 netfs_stat(&netfs_n_wh_writepages);
581
582 do {
583 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
584
585 /* It appears we don't have to handle cyclic writeback wrapping. */
586 WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to));
587
588 if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
589 unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
590 set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
591 wreq->netfs_ops->begin_writeback(wreq);
592 }
593
594 error = netfs_write_folio(wreq, wbc, folio);
595 if (error < 0)
596 break;
597 } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
598
599 netfs_end_issue_write(wreq);
600
601 mutex_unlock(&ictx->wb_lock);
602
603 netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
604 _leave(" = %d", error);
605 return error;
606
607 couldnt_start:
608 netfs_kill_dirty_pages(mapping, wbc, folio);
609 out:
610 mutex_unlock(&ictx->wb_lock);
611 _leave(" = %d", error);
612 return error;
613 }
614 EXPORT_SYMBOL(netfs_writepages);
615
616 /*
617 * Begin a write operation for writing through the pagecache.
618 */
netfs_begin_writethrough(struct kiocb * iocb,size_t len)619 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
620 {
621 struct netfs_io_request *wreq = NULL;
622 struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
623
624 mutex_lock(&ictx->wb_lock);
625
626 wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
627 iocb->ki_pos, NETFS_WRITETHROUGH);
628 if (IS_ERR(wreq)) {
629 mutex_unlock(&ictx->wb_lock);
630 return wreq;
631 }
632
633 wreq->io_streams[0].avail = true;
634 trace_netfs_write(wreq, netfs_write_trace_writethrough);
635 return wreq;
636 }
637
638 /*
639 * Advance the state of the write operation used when writing through the
640 * pagecache. Data has been copied into the pagecache that we need to append
641 * to the request. If we've added more than wsize then we need to create a new
642 * subrequest.
643 */
netfs_advance_writethrough(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * folio,size_t copied,bool to_page_end,struct folio ** writethrough_cache)644 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
645 struct folio *folio, size_t copied, bool to_page_end,
646 struct folio **writethrough_cache)
647 {
648 _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
649 wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
650
651 if (!*writethrough_cache) {
652 if (folio_test_dirty(folio))
653 /* Sigh. mmap. */
654 folio_clear_dirty_for_io(folio);
655
656 /* We can make multiple writes to the folio... */
657 folio_start_writeback(folio);
658 if (wreq->len == 0)
659 trace_netfs_folio(folio, netfs_folio_trace_wthru);
660 else
661 trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
662 *writethrough_cache = folio;
663 }
664
665 wreq->len += copied;
666 if (!to_page_end)
667 return 0;
668
669 *writethrough_cache = NULL;
670 return netfs_write_folio(wreq, wbc, folio);
671 }
672
673 /*
674 * End a write operation used when writing through the pagecache.
675 */
netfs_end_writethrough(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * writethrough_cache)676 int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
677 struct folio *writethrough_cache)
678 {
679 struct netfs_inode *ictx = netfs_inode(wreq->inode);
680 int ret;
681
682 _enter("R=%x", wreq->debug_id);
683
684 if (writethrough_cache)
685 netfs_write_folio(wreq, wbc, writethrough_cache);
686
687 netfs_end_issue_write(wreq);
688
689 mutex_unlock(&ictx->wb_lock);
690
691 if (wreq->iocb) {
692 ret = -EIOCBQUEUED;
693 } else {
694 wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
695 ret = wreq->error;
696 }
697 netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
698 return ret;
699 }
700
701 /*
702 * Write data to the server without going through the pagecache and without
703 * writing it to the local cache.
704 */
netfs_unbuffered_write(struct netfs_io_request * wreq,bool may_wait,size_t len)705 int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
706 {
707 struct netfs_io_stream *upload = &wreq->io_streams[0];
708 ssize_t part;
709 loff_t start = wreq->start;
710 int error = 0;
711
712 _enter("%zx", len);
713
714 if (wreq->origin == NETFS_DIO_WRITE)
715 inode_dio_begin(wreq->inode);
716
717 while (len) {
718 // TODO: Prepare content encryption
719
720 _debug("unbuffered %zx", len);
721 part = netfs_advance_write(wreq, upload, start, len, false);
722 start += part;
723 len -= part;
724 rolling_buffer_advance(&wreq->buffer, part);
725 if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
726 trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
727 wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
728 }
729 if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
730 break;
731 }
732
733 netfs_end_issue_write(wreq);
734 _leave(" = %d", error);
735 return error;
736 }
737
738 /*
739 * Write some of a pending folio data back to the server and/or the cache.
740 */
netfs_write_folio_single(struct netfs_io_request * wreq,struct folio * folio)741 static int netfs_write_folio_single(struct netfs_io_request *wreq,
742 struct folio *folio)
743 {
744 struct netfs_io_stream *upload = &wreq->io_streams[0];
745 struct netfs_io_stream *cache = &wreq->io_streams[1];
746 struct netfs_io_stream *stream;
747 size_t iter_off = 0;
748 size_t fsize = folio_size(folio), flen;
749 loff_t fpos = folio_pos(folio);
750 bool to_eof = false;
751 bool no_debug = false;
752
753 _enter("");
754
755 flen = folio_size(folio);
756 if (flen > wreq->i_size - fpos) {
757 flen = wreq->i_size - fpos;
758 folio_zero_segment(folio, flen, fsize);
759 to_eof = true;
760 } else if (flen == wreq->i_size - fpos) {
761 to_eof = true;
762 }
763
764 _debug("folio %zx/%zx", flen, fsize);
765
766 if (!upload->avail && !cache->avail) {
767 trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
768 return 0;
769 }
770
771 if (!upload->construct)
772 trace_netfs_folio(folio, netfs_folio_trace_store);
773 else
774 trace_netfs_folio(folio, netfs_folio_trace_store_plus);
775
776 /* Attach the folio to the rolling buffer. */
777 folio_get(folio);
778 rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);
779
780 /* Move the submission point forward to allow for write-streaming data
781 * not starting at the front of the page. We don't do write-streaming
782 * with the cache as the cache requires DIO alignment.
783 *
784 * Also skip uploading for data that's been read and just needs copying
785 * to the cache.
786 */
787 for (int s = 0; s < NR_IO_STREAMS; s++) {
788 stream = &wreq->io_streams[s];
789 stream->submit_off = 0;
790 stream->submit_len = flen;
791 if (!stream->avail) {
792 stream->submit_off = UINT_MAX;
793 stream->submit_len = 0;
794 }
795 }
796
797 /* Attach the folio to one or more subrequests. For a big folio, we
798 * could end up with thousands of subrequests if the wsize is small -
799 * but we might need to wait during the creation of subrequests for
800 * network resources (eg. SMB credits).
801 */
802 for (;;) {
803 ssize_t part;
804 size_t lowest_off = ULONG_MAX;
805 int choose_s = -1;
806
807 /* Always add to the lowest-submitted stream first. */
808 for (int s = 0; s < NR_IO_STREAMS; s++) {
809 stream = &wreq->io_streams[s];
810 if (stream->submit_len > 0 &&
811 stream->submit_off < lowest_off) {
812 lowest_off = stream->submit_off;
813 choose_s = s;
814 }
815 }
816
817 if (choose_s < 0)
818 break;
819 stream = &wreq->io_streams[choose_s];
820
821 /* Advance the iterator(s). */
822 if (stream->submit_off > iter_off) {
823 rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
824 iter_off = stream->submit_off;
825 }
826
827 atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
828 stream->submit_extendable_to = fsize - stream->submit_off;
829 part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
830 stream->submit_len, to_eof);
831 stream->submit_off += part;
832 if (part > stream->submit_len)
833 stream->submit_len = 0;
834 else
835 stream->submit_len -= part;
836 if (part > 0)
837 no_debug = true;
838 }
839
840 wreq->buffer.iter.iov_offset = 0;
841 if (fsize > iter_off)
842 rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
843 atomic64_set(&wreq->issued_to, fpos + fsize);
844
845 if (!no_debug)
846 kdebug("R=%x: No submit", wreq->debug_id);
847 _leave(" = 0");
848 return 0;
849 }
850
851 /**
852 * netfs_writeback_single - Write back a monolithic payload
853 * @mapping: The mapping to write from
854 * @wbc: Hints from the VM
855 * @iter: Data to write, must be ITER_FOLIOQ.
856 *
857 * Write a monolithic, non-pagecache object back to the server and/or
858 * the cache.
859 */
netfs_writeback_single(struct address_space * mapping,struct writeback_control * wbc,struct iov_iter * iter)860 int netfs_writeback_single(struct address_space *mapping,
861 struct writeback_control *wbc,
862 struct iov_iter *iter)
863 {
864 struct netfs_io_request *wreq;
865 struct netfs_inode *ictx = netfs_inode(mapping->host);
866 struct folio_queue *fq;
867 size_t size = iov_iter_count(iter);
868 int ret;
869
870 if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
871 return -EIO;
872
873 if (!mutex_trylock(&ictx->wb_lock)) {
874 if (wbc->sync_mode == WB_SYNC_NONE) {
875 netfs_stat(&netfs_n_wb_lock_skip);
876 return 0;
877 }
878 netfs_stat(&netfs_n_wb_lock_wait);
879 mutex_lock(&ictx->wb_lock);
880 }
881
882 wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
883 if (IS_ERR(wreq)) {
884 ret = PTR_ERR(wreq);
885 goto couldnt_start;
886 }
887
888 trace_netfs_write(wreq, netfs_write_trace_writeback);
889 netfs_stat(&netfs_n_wh_writepages);
890
891 if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
892 wreq->netfs_ops->begin_writeback(wreq);
893
894 for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
895 for (int slot = 0; slot < folioq_count(fq); slot++) {
896 struct folio *folio = folioq_folio(fq, slot);
897 size_t part = umin(folioq_folio_size(fq, slot), size);
898
899 _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
900
901 ret = netfs_write_folio_single(wreq, folio);
902 if (ret < 0)
903 goto stop;
904 size -= part;
905 if (size <= 0)
906 goto stop;
907 }
908 }
909
910 stop:
911 for (int s = 0; s < NR_IO_STREAMS; s++)
912 netfs_issue_write(wreq, &wreq->io_streams[s]);
913 smp_wmb(); /* Write lists before ALL_QUEUED. */
914 set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
915
916 mutex_unlock(&ictx->wb_lock);
917
918 netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
919 _leave(" = %d", ret);
920 return ret;
921
922 couldnt_start:
923 mutex_unlock(&ictx->wb_lock);
924 _leave(" = %d", ret);
925 return ret;
926 }
927 EXPORT_SYMBOL(netfs_writeback_single);
928