xref: /linux/fs/netfs/write_issue.c (revision 0b3bb205808195159be633a8cefb602670e856fb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem high-level (buffered) writeback.
3  *
4  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  *
7  *
8  * To support network filesystems with local caching, we manage a situation
9  * that can be envisioned like the following:
10  *
11  *               +---+---+-----+-----+---+----------+
12  *    Folios:    |   |   |     |     |   |          |
13  *               +---+---+-----+-----+---+----------+
14  *
15  *                 +------+------+     +----+----+
16  *    Upload:      |      |      |.....|    |    |
17  *  (Stream 0)     +------+------+     +----+----+
18  *
19  *               +------+------+------+------+------+
20  *    Cache:     |      |      |      |      |      |
21  *  (Stream 1)   +------+------+------+------+------+
22  *
23  * Where we have a sequence of folios of varying sizes that we need to overlay
24  * with multiple parallel streams of I/O requests, where the I/O requests in a
25  * stream may also be of various sizes (in cifs, for example, the sizes are
26  * negotiated with the server; in something like ceph, they may represent the
27  * sizes of storage objects).
28  *
29  * The sequence in each stream may contain gaps and noncontiguous subrequests
30  * may be glued together into single vectored write RPCs.
31  */
32 
33 #include <linux/export.h>
34 #include <linux/fs.h>
35 #include <linux/mm.h>
36 #include <linux/pagemap.h>
37 #include "internal.h"
38 
39 /*
40  * Kill all dirty folios in the event of an unrecoverable error, starting with
41  * a locked folio we've already obtained from writeback_iter().
42  */
netfs_kill_dirty_pages(struct address_space * mapping,struct writeback_control * wbc,struct folio * folio)43 static void netfs_kill_dirty_pages(struct address_space *mapping,
44 				   struct writeback_control *wbc,
45 				   struct folio *folio)
46 {
47 	int error = 0;
48 
49 	do {
50 		enum netfs_folio_trace why = netfs_folio_trace_kill;
51 		struct netfs_group *group = NULL;
52 		struct netfs_folio *finfo = NULL;
53 		void *priv;
54 
55 		priv = folio_detach_private(folio);
56 		if (priv) {
57 			finfo = __netfs_folio_info(priv);
58 			if (finfo) {
59 				/* Kill folio from streaming write. */
60 				group = finfo->netfs_group;
61 				why = netfs_folio_trace_kill_s;
62 			} else {
63 				group = priv;
64 				if (group == NETFS_FOLIO_COPY_TO_CACHE) {
65 					/* Kill copy-to-cache folio */
66 					why = netfs_folio_trace_kill_cc;
67 					group = NULL;
68 				} else {
69 					/* Kill folio with group */
70 					why = netfs_folio_trace_kill_g;
71 				}
72 			}
73 		}
74 
75 		trace_netfs_folio(folio, why);
76 
77 		folio_start_writeback(folio);
78 		folio_unlock(folio);
79 		folio_end_writeback(folio);
80 
81 		netfs_put_group(group);
82 		kfree(finfo);
83 
84 	} while ((folio = writeback_iter(mapping, wbc, folio, &error)));
85 }
86 
87 /*
88  * Create a write request and set it up appropriately for the origin type.
89  */
netfs_create_write_req(struct address_space * mapping,struct file * file,loff_t start,enum netfs_io_origin origin)90 struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
91 						struct file *file,
92 						loff_t start,
93 						enum netfs_io_origin origin)
94 {
95 	struct netfs_io_request *wreq;
96 	struct netfs_inode *ictx;
97 	bool is_cacheable = (origin == NETFS_WRITEBACK ||
98 			     origin == NETFS_WRITEBACK_SINGLE ||
99 			     origin == NETFS_WRITETHROUGH ||
100 			     origin == NETFS_PGPRIV2_COPY_TO_CACHE);
101 
102 	wreq = netfs_alloc_request(mapping, file, start, 0, origin);
103 	if (IS_ERR(wreq))
104 		return wreq;
105 
106 	_enter("R=%x", wreq->debug_id);
107 
108 	ictx = netfs_inode(wreq->inode);
109 	if (is_cacheable && netfs_is_cache_enabled(ictx))
110 		fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
111 	if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
112 		goto nomem;
113 
114 	wreq->cleaned_to = wreq->start;
115 
116 	wreq->io_streams[0].stream_nr		= 0;
117 	wreq->io_streams[0].source		= NETFS_UPLOAD_TO_SERVER;
118 	wreq->io_streams[0].prepare_write	= ictx->ops->prepare_write;
119 	wreq->io_streams[0].issue_write		= ictx->ops->issue_write;
120 	wreq->io_streams[0].collected_to	= start;
121 	wreq->io_streams[0].transferred		= 0;
122 
123 	wreq->io_streams[1].stream_nr		= 1;
124 	wreq->io_streams[1].source		= NETFS_WRITE_TO_CACHE;
125 	wreq->io_streams[1].collected_to	= start;
126 	wreq->io_streams[1].transferred		= 0;
127 	if (fscache_resources_valid(&wreq->cache_resources)) {
128 		wreq->io_streams[1].avail	= true;
129 		wreq->io_streams[1].active	= true;
130 		wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
131 		wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
132 	}
133 
134 	return wreq;
135 nomem:
136 	netfs_put_failed_request(wreq);
137 	return ERR_PTR(-ENOMEM);
138 }
139 
140 /**
141  * netfs_prepare_write_failed - Note write preparation failed
142  * @subreq: The subrequest to mark
143  *
144  * Mark a subrequest to note that preparation for write failed.
145  */
netfs_prepare_write_failed(struct netfs_io_subrequest * subreq)146 void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
147 {
148 	__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
149 	trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
150 }
151 EXPORT_SYMBOL(netfs_prepare_write_failed);
152 
153 /*
154  * Prepare a write subrequest.  We need to allocate a new subrequest
155  * if we don't have one.
156  */
netfs_prepare_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream,loff_t start)157 void netfs_prepare_write(struct netfs_io_request *wreq,
158 			 struct netfs_io_stream *stream,
159 			 loff_t start)
160 {
161 	struct netfs_io_subrequest *subreq;
162 	struct iov_iter *wreq_iter = &wreq->buffer.iter;
163 
164 	/* Make sure we don't point the iterator at a used-up folio_queue
165 	 * struct being used as a placeholder to prevent the queue from
166 	 * collapsing.  In such a case, extend the queue.
167 	 */
168 	if (iov_iter_is_folioq(wreq_iter) &&
169 	    wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
170 		rolling_buffer_make_space(&wreq->buffer);
171 
172 	subreq = netfs_alloc_subrequest(wreq);
173 	subreq->source		= stream->source;
174 	subreq->start		= start;
175 	subreq->stream_nr	= stream->stream_nr;
176 	subreq->io_iter		= *wreq_iter;
177 
178 	_enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
179 
180 	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
181 
182 	stream->sreq_max_len	= UINT_MAX;
183 	stream->sreq_max_segs	= INT_MAX;
184 	switch (stream->source) {
185 	case NETFS_UPLOAD_TO_SERVER:
186 		netfs_stat(&netfs_n_wh_upload);
187 		stream->sreq_max_len = wreq->wsize;
188 		break;
189 	case NETFS_WRITE_TO_CACHE:
190 		netfs_stat(&netfs_n_wh_write);
191 		break;
192 	default:
193 		WARN_ON_ONCE(1);
194 		break;
195 	}
196 
197 	if (stream->prepare_write)
198 		stream->prepare_write(subreq);
199 
200 	__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
201 
202 	/* We add to the end of the list whilst the collector may be walking
203 	 * the list.  The collector only goes nextwards and uses the lock to
204 	 * remove entries off of the front.
205 	 */
206 	spin_lock(&wreq->lock);
207 	list_add_tail(&subreq->rreq_link, &stream->subrequests);
208 	if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
209 		stream->front = subreq;
210 		if (!stream->active) {
211 			stream->collected_to = stream->front->start;
212 			/* Write list pointers before active flag */
213 			smp_store_release(&stream->active, true);
214 		}
215 	}
216 
217 	spin_unlock(&wreq->lock);
218 
219 	stream->construct = subreq;
220 }
221 
222 /*
223  * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
224  * operation.  The operation may be asynchronous and should call
225  * netfs_write_subrequest_terminated() when complete.
226  */
netfs_do_issue_write(struct netfs_io_stream * stream,struct netfs_io_subrequest * subreq)227 static void netfs_do_issue_write(struct netfs_io_stream *stream,
228 				 struct netfs_io_subrequest *subreq)
229 {
230 	struct netfs_io_request *wreq = subreq->rreq;
231 
232 	_enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
233 
234 	if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
235 		return netfs_write_subrequest_terminated(subreq, subreq->error);
236 
237 	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
238 	stream->issue_write(subreq);
239 }
240 
netfs_reissue_write(struct netfs_io_stream * stream,struct netfs_io_subrequest * subreq,struct iov_iter * source)241 void netfs_reissue_write(struct netfs_io_stream *stream,
242 			 struct netfs_io_subrequest *subreq,
243 			 struct iov_iter *source)
244 {
245 	size_t size = subreq->len - subreq->transferred;
246 
247 	// TODO: Use encrypted buffer
248 	subreq->io_iter = *source;
249 	iov_iter_advance(source, size);
250 	iov_iter_truncate(&subreq->io_iter, size);
251 
252 	subreq->retry_count++;
253 	subreq->error = 0;
254 	__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
255 	__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
256 	netfs_stat(&netfs_n_wh_retry_write_subreq);
257 	netfs_do_issue_write(stream, subreq);
258 }
259 
netfs_issue_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream)260 void netfs_issue_write(struct netfs_io_request *wreq,
261 		       struct netfs_io_stream *stream)
262 {
263 	struct netfs_io_subrequest *subreq = stream->construct;
264 
265 	if (!subreq)
266 		return;
267 	stream->construct = NULL;
268 	subreq->io_iter.count = subreq->len;
269 	netfs_do_issue_write(stream, subreq);
270 }
271 
272 /*
273  * Add data to the write subrequest, dispatching each as we fill it up or if it
274  * is discontiguous with the previous.  We only fill one part at a time so that
275  * we can avoid overrunning the credits obtained (cifs) and try to parallelise
276  * content-crypto preparation with network writes.
277  */
netfs_advance_write(struct netfs_io_request * wreq,struct netfs_io_stream * stream,loff_t start,size_t len,bool to_eof)278 size_t netfs_advance_write(struct netfs_io_request *wreq,
279 			   struct netfs_io_stream *stream,
280 			   loff_t start, size_t len, bool to_eof)
281 {
282 	struct netfs_io_subrequest *subreq = stream->construct;
283 	size_t part;
284 
285 	if (!stream->avail) {
286 		_leave("no write");
287 		return len;
288 	}
289 
290 	_enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
291 
292 	if (subreq && start != subreq->start + subreq->len) {
293 		netfs_issue_write(wreq, stream);
294 		subreq = NULL;
295 	}
296 
297 	if (!stream->construct)
298 		netfs_prepare_write(wreq, stream, start);
299 	subreq = stream->construct;
300 
301 	part = umin(stream->sreq_max_len - subreq->len, len);
302 	_debug("part %zx/%zx %zx/%zx", subreq->len, stream->sreq_max_len, part, len);
303 	subreq->len += part;
304 	subreq->nr_segs++;
305 	stream->submit_extendable_to -= part;
306 
307 	if (subreq->len >= stream->sreq_max_len ||
308 	    subreq->nr_segs >= stream->sreq_max_segs ||
309 	    to_eof) {
310 		netfs_issue_write(wreq, stream);
311 		subreq = NULL;
312 	}
313 
314 	return part;
315 }
316 
317 /*
318  * Write some of a pending folio data back to the server.
319  */
netfs_write_folio(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * folio)320 static int netfs_write_folio(struct netfs_io_request *wreq,
321 			     struct writeback_control *wbc,
322 			     struct folio *folio)
323 {
324 	struct netfs_io_stream *upload = &wreq->io_streams[0];
325 	struct netfs_io_stream *cache  = &wreq->io_streams[1];
326 	struct netfs_io_stream *stream;
327 	struct netfs_group *fgroup; /* TODO: Use this with ceph */
328 	struct netfs_folio *finfo;
329 	size_t iter_off = 0;
330 	size_t fsize = folio_size(folio), flen = fsize, foff = 0;
331 	loff_t fpos = folio_pos(folio), i_size;
332 	bool to_eof = false, streamw = false;
333 	bool debug = false;
334 
335 	_enter("");
336 
337 	if (rolling_buffer_make_space(&wreq->buffer) < 0)
338 		return -ENOMEM;
339 
340 	/* netfs_perform_write() may shift i_size around the page or from out
341 	 * of the page to beyond it, but cannot move i_size into or through the
342 	 * page since we have it locked.
343 	 */
344 	i_size = i_size_read(wreq->inode);
345 
346 	if (fpos >= i_size) {
347 		/* mmap beyond eof. */
348 		_debug("beyond eof");
349 		folio_start_writeback(folio);
350 		folio_unlock(folio);
351 		wreq->nr_group_rel += netfs_folio_written_back(folio);
352 		netfs_put_group_many(wreq->group, wreq->nr_group_rel);
353 		wreq->nr_group_rel = 0;
354 		return 0;
355 	}
356 
357 	if (fpos + fsize > wreq->i_size)
358 		wreq->i_size = i_size;
359 
360 	fgroup = netfs_folio_group(folio);
361 	finfo = netfs_folio_info(folio);
362 	if (finfo) {
363 		foff = finfo->dirty_offset;
364 		flen = foff + finfo->dirty_len;
365 		streamw = true;
366 	}
367 
368 	if (wreq->origin == NETFS_WRITETHROUGH) {
369 		to_eof = false;
370 		if (flen > i_size - fpos)
371 			flen = i_size - fpos;
372 	} else if (flen > i_size - fpos) {
373 		flen = i_size - fpos;
374 		if (!streamw)
375 			folio_zero_segment(folio, flen, fsize);
376 		to_eof = true;
377 	} else if (flen == i_size - fpos) {
378 		to_eof = true;
379 	}
380 	flen -= foff;
381 
382 	_debug("folio %zx %zx %zx", foff, flen, fsize);
383 
384 	/* Deal with discontinuities in the stream of dirty pages.  These can
385 	 * arise from a number of sources:
386 	 *
387 	 * (1) Intervening non-dirty pages from random-access writes, multiple
388 	 *     flushers writing back different parts simultaneously and manual
389 	 *     syncing.
390 	 *
391 	 * (2) Partially-written pages from write-streaming.
392 	 *
393 	 * (3) Pages that belong to a different write-back group (eg.  Ceph
394 	 *     snapshots).
395 	 *
396 	 * (4) Actually-clean pages that were marked for write to the cache
397 	 *     when they were read.  Note that these appear as a special
398 	 *     write-back group.
399 	 */
400 	if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
401 		netfs_issue_write(wreq, upload);
402 	} else if (fgroup != wreq->group) {
403 		/* We can't write this page to the server yet. */
404 		kdebug("wrong group");
405 		folio_redirty_for_writepage(wbc, folio);
406 		folio_unlock(folio);
407 		netfs_issue_write(wreq, upload);
408 		netfs_issue_write(wreq, cache);
409 		return 0;
410 	}
411 
412 	if (foff > 0)
413 		netfs_issue_write(wreq, upload);
414 	if (streamw)
415 		netfs_issue_write(wreq, cache);
416 
417 	/* Flip the page to the writeback state and unlock.  If we're called
418 	 * from write-through, then the page has already been put into the wb
419 	 * state.
420 	 */
421 	if (wreq->origin == NETFS_WRITEBACK)
422 		folio_start_writeback(folio);
423 	folio_unlock(folio);
424 
425 	if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
426 		if (!cache->avail) {
427 			trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
428 			netfs_issue_write(wreq, upload);
429 			netfs_folio_written_back(folio);
430 			return 0;
431 		}
432 		trace_netfs_folio(folio, netfs_folio_trace_store_copy);
433 	} else if (!upload->avail && !cache->avail) {
434 		trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
435 		netfs_folio_written_back(folio);
436 		return 0;
437 	} else if (!upload->construct) {
438 		trace_netfs_folio(folio, netfs_folio_trace_store);
439 	} else {
440 		trace_netfs_folio(folio, netfs_folio_trace_store_plus);
441 	}
442 
443 	/* Attach the folio to the rolling buffer. */
444 	rolling_buffer_append(&wreq->buffer, folio, 0);
445 
446 	/* Move the submission point forward to allow for write-streaming data
447 	 * not starting at the front of the page.  We don't do write-streaming
448 	 * with the cache as the cache requires DIO alignment.
449 	 *
450 	 * Also skip uploading for data that's been read and just needs copying
451 	 * to the cache.
452 	 */
453 	for (int s = 0; s < NR_IO_STREAMS; s++) {
454 		stream = &wreq->io_streams[s];
455 		stream->submit_off = foff;
456 		stream->submit_len = flen;
457 		if (!stream->avail ||
458 		    (stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
459 		    (stream->source == NETFS_UPLOAD_TO_SERVER &&
460 		     fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
461 			stream->submit_off = UINT_MAX;
462 			stream->submit_len = 0;
463 		}
464 	}
465 
466 	/* Attach the folio to one or more subrequests.  For a big folio, we
467 	 * could end up with thousands of subrequests if the wsize is small -
468 	 * but we might need to wait during the creation of subrequests for
469 	 * network resources (eg. SMB credits).
470 	 */
471 	for (;;) {
472 		ssize_t part;
473 		size_t lowest_off = ULONG_MAX;
474 		int choose_s = -1;
475 
476 		/* Always add to the lowest-submitted stream first. */
477 		for (int s = 0; s < NR_IO_STREAMS; s++) {
478 			stream = &wreq->io_streams[s];
479 			if (stream->submit_len > 0 &&
480 			    stream->submit_off < lowest_off) {
481 				lowest_off = stream->submit_off;
482 				choose_s = s;
483 			}
484 		}
485 
486 		if (choose_s < 0)
487 			break;
488 		stream = &wreq->io_streams[choose_s];
489 
490 		/* Advance the iterator(s). */
491 		if (stream->submit_off > iter_off) {
492 			rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
493 			iter_off = stream->submit_off;
494 		}
495 
496 		atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
497 		stream->submit_extendable_to = fsize - stream->submit_off;
498 		part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
499 					   stream->submit_len, to_eof);
500 		stream->submit_off += part;
501 		if (part > stream->submit_len)
502 			stream->submit_len = 0;
503 		else
504 			stream->submit_len -= part;
505 		if (part > 0)
506 			debug = true;
507 	}
508 
509 	if (fsize > iter_off)
510 		rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
511 	atomic64_set(&wreq->issued_to, fpos + fsize);
512 
513 	if (!debug)
514 		kdebug("R=%x: No submit", wreq->debug_id);
515 
516 	if (foff + flen < fsize)
517 		for (int s = 0; s < NR_IO_STREAMS; s++)
518 			netfs_issue_write(wreq, &wreq->io_streams[s]);
519 
520 	_leave(" = 0");
521 	return 0;
522 }
523 
524 /*
525  * End the issuing of writes, letting the collector know we're done.
526  */
netfs_end_issue_write(struct netfs_io_request * wreq)527 static void netfs_end_issue_write(struct netfs_io_request *wreq)
528 {
529 	bool needs_poke = true;
530 
531 	smp_wmb(); /* Write subreq lists before ALL_QUEUED. */
532 	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
533 
534 	for (int s = 0; s < NR_IO_STREAMS; s++) {
535 		struct netfs_io_stream *stream = &wreq->io_streams[s];
536 
537 		if (!stream->active)
538 			continue;
539 		if (!list_empty(&stream->subrequests))
540 			needs_poke = false;
541 		netfs_issue_write(wreq, stream);
542 	}
543 
544 	if (needs_poke)
545 		netfs_wake_collector(wreq);
546 }
547 
548 /*
549  * Write some of the pending data back to the server
550  */
netfs_writepages(struct address_space * mapping,struct writeback_control * wbc)551 int netfs_writepages(struct address_space *mapping,
552 		     struct writeback_control *wbc)
553 {
554 	struct netfs_inode *ictx = netfs_inode(mapping->host);
555 	struct netfs_io_request *wreq = NULL;
556 	struct folio *folio;
557 	int error = 0;
558 
559 	if (!mutex_trylock(&ictx->wb_lock)) {
560 		if (wbc->sync_mode == WB_SYNC_NONE) {
561 			netfs_stat(&netfs_n_wb_lock_skip);
562 			return 0;
563 		}
564 		netfs_stat(&netfs_n_wb_lock_wait);
565 		mutex_lock(&ictx->wb_lock);
566 	}
567 
568 	/* Need the first folio to be able to set up the op. */
569 	folio = writeback_iter(mapping, wbc, NULL, &error);
570 	if (!folio)
571 		goto out;
572 
573 	wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
574 	if (IS_ERR(wreq)) {
575 		error = PTR_ERR(wreq);
576 		goto couldnt_start;
577 	}
578 
579 	__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
580 	trace_netfs_write(wreq, netfs_write_trace_writeback);
581 	netfs_stat(&netfs_n_wh_writepages);
582 
583 	do {
584 		_debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
585 
586 		/* It appears we don't have to handle cyclic writeback wrapping. */
587 		WARN_ON_ONCE(wreq && folio_pos(folio) < atomic64_read(&wreq->issued_to));
588 
589 		if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
590 		    unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
591 			set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
592 			wreq->netfs_ops->begin_writeback(wreq);
593 		}
594 
595 		error = netfs_write_folio(wreq, wbc, folio);
596 		if (error < 0)
597 			break;
598 	} while ((folio = writeback_iter(mapping, wbc, folio, &error)));
599 
600 	netfs_end_issue_write(wreq);
601 
602 	mutex_unlock(&ictx->wb_lock);
603 	netfs_wake_collector(wreq);
604 
605 	netfs_put_request(wreq, netfs_rreq_trace_put_return);
606 	_leave(" = %d", error);
607 	return error;
608 
609 couldnt_start:
610 	netfs_kill_dirty_pages(mapping, wbc, folio);
611 out:
612 	mutex_unlock(&ictx->wb_lock);
613 	_leave(" = %d", error);
614 	return error;
615 }
616 EXPORT_SYMBOL(netfs_writepages);
617 
618 /*
619  * Begin a write operation for writing through the pagecache.
620  */
netfs_begin_writethrough(struct kiocb * iocb,size_t len)621 struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
622 {
623 	struct netfs_io_request *wreq = NULL;
624 	struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
625 
626 	mutex_lock(&ictx->wb_lock);
627 
628 	wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
629 				      iocb->ki_pos, NETFS_WRITETHROUGH);
630 	if (IS_ERR(wreq)) {
631 		mutex_unlock(&ictx->wb_lock);
632 		return wreq;
633 	}
634 
635 	wreq->io_streams[0].avail = true;
636 	trace_netfs_write(wreq, netfs_write_trace_writethrough);
637 	return wreq;
638 }
639 
640 /*
641  * Advance the state of the write operation used when writing through the
642  * pagecache.  Data has been copied into the pagecache that we need to append
643  * to the request.  If we've added more than wsize then we need to create a new
644  * subrequest.
645  */
netfs_advance_writethrough(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * folio,size_t copied,bool to_page_end,struct folio ** writethrough_cache)646 int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
647 			       struct folio *folio, size_t copied, bool to_page_end,
648 			       struct folio **writethrough_cache)
649 {
650 	_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
651 	       wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
652 
653 	if (!*writethrough_cache) {
654 		if (folio_test_dirty(folio))
655 			/* Sigh.  mmap. */
656 			folio_clear_dirty_for_io(folio);
657 
658 		/* We can make multiple writes to the folio... */
659 		folio_start_writeback(folio);
660 		if (wreq->len == 0)
661 			trace_netfs_folio(folio, netfs_folio_trace_wthru);
662 		else
663 			trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
664 		*writethrough_cache = folio;
665 	}
666 
667 	wreq->len += copied;
668 	if (!to_page_end)
669 		return 0;
670 
671 	*writethrough_cache = NULL;
672 	return netfs_write_folio(wreq, wbc, folio);
673 }
674 
675 /*
676  * End a write operation used when writing through the pagecache.
677  */
netfs_end_writethrough(struct netfs_io_request * wreq,struct writeback_control * wbc,struct folio * writethrough_cache)678 ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
679 			       struct folio *writethrough_cache)
680 {
681 	struct netfs_inode *ictx = netfs_inode(wreq->inode);
682 	ssize_t ret;
683 
684 	_enter("R=%x", wreq->debug_id);
685 
686 	if (writethrough_cache)
687 		netfs_write_folio(wreq, wbc, writethrough_cache);
688 
689 	netfs_end_issue_write(wreq);
690 
691 	mutex_unlock(&ictx->wb_lock);
692 
693 	if (wreq->iocb)
694 		ret = -EIOCBQUEUED;
695 	else
696 		ret = netfs_wait_for_write(wreq);
697 	netfs_put_request(wreq, netfs_rreq_trace_put_return);
698 	return ret;
699 }
700 
701 /*
702  * Write some of a pending folio data back to the server and/or the cache.
703  */
netfs_write_folio_single(struct netfs_io_request * wreq,struct folio * folio)704 static int netfs_write_folio_single(struct netfs_io_request *wreq,
705 				    struct folio *folio)
706 {
707 	struct netfs_io_stream *upload = &wreq->io_streams[0];
708 	struct netfs_io_stream *cache  = &wreq->io_streams[1];
709 	struct netfs_io_stream *stream;
710 	size_t iter_off = 0;
711 	size_t fsize = folio_size(folio), flen;
712 	loff_t fpos = folio_pos(folio);
713 	bool to_eof = false;
714 	bool no_debug = false;
715 
716 	_enter("");
717 
718 	flen = folio_size(folio);
719 	if (flen > wreq->i_size - fpos) {
720 		flen = wreq->i_size - fpos;
721 		folio_zero_segment(folio, flen, fsize);
722 		to_eof = true;
723 	} else if (flen == wreq->i_size - fpos) {
724 		to_eof = true;
725 	}
726 
727 	_debug("folio %zx/%zx", flen, fsize);
728 
729 	if (!upload->avail && !cache->avail) {
730 		trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
731 		return 0;
732 	}
733 
734 	if (!upload->construct)
735 		trace_netfs_folio(folio, netfs_folio_trace_store);
736 	else
737 		trace_netfs_folio(folio, netfs_folio_trace_store_plus);
738 
739 	/* Attach the folio to the rolling buffer. */
740 	folio_get(folio);
741 	rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);
742 
743 	/* Move the submission point forward to allow for write-streaming data
744 	 * not starting at the front of the page.  We don't do write-streaming
745 	 * with the cache as the cache requires DIO alignment.
746 	 *
747 	 * Also skip uploading for data that's been read and just needs copying
748 	 * to the cache.
749 	 */
750 	for (int s = 0; s < NR_IO_STREAMS; s++) {
751 		stream = &wreq->io_streams[s];
752 		stream->submit_off = 0;
753 		stream->submit_len = flen;
754 		if (!stream->avail) {
755 			stream->submit_off = UINT_MAX;
756 			stream->submit_len = 0;
757 		}
758 	}
759 
760 	/* Attach the folio to one or more subrequests.  For a big folio, we
761 	 * could end up with thousands of subrequests if the wsize is small -
762 	 * but we might need to wait during the creation of subrequests for
763 	 * network resources (eg. SMB credits).
764 	 */
765 	for (;;) {
766 		ssize_t part;
767 		size_t lowest_off = ULONG_MAX;
768 		int choose_s = -1;
769 
770 		/* Always add to the lowest-submitted stream first. */
771 		for (int s = 0; s < NR_IO_STREAMS; s++) {
772 			stream = &wreq->io_streams[s];
773 			if (stream->submit_len > 0 &&
774 			    stream->submit_off < lowest_off) {
775 				lowest_off = stream->submit_off;
776 				choose_s = s;
777 			}
778 		}
779 
780 		if (choose_s < 0)
781 			break;
782 		stream = &wreq->io_streams[choose_s];
783 
784 		/* Advance the iterator(s). */
785 		if (stream->submit_off > iter_off) {
786 			rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
787 			iter_off = stream->submit_off;
788 		}
789 
790 		atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
791 		stream->submit_extendable_to = fsize - stream->submit_off;
792 		part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
793 					   stream->submit_len, to_eof);
794 		stream->submit_off += part;
795 		if (part > stream->submit_len)
796 			stream->submit_len = 0;
797 		else
798 			stream->submit_len -= part;
799 		if (part > 0)
800 			no_debug = true;
801 	}
802 
803 	wreq->buffer.iter.iov_offset = 0;
804 	if (fsize > iter_off)
805 		rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
806 	atomic64_set(&wreq->issued_to, fpos + fsize);
807 
808 	if (!no_debug)
809 		kdebug("R=%x: No submit", wreq->debug_id);
810 	_leave(" = 0");
811 	return 0;
812 }
813 
814 /**
815  * netfs_writeback_single - Write back a monolithic payload
816  * @mapping: The mapping to write from
817  * @wbc: Hints from the VM
818  * @iter: Data to write, must be ITER_FOLIOQ.
819  *
820  * Write a monolithic, non-pagecache object back to the server and/or
821  * the cache.
822  */
netfs_writeback_single(struct address_space * mapping,struct writeback_control * wbc,struct iov_iter * iter)823 int netfs_writeback_single(struct address_space *mapping,
824 			   struct writeback_control *wbc,
825 			   struct iov_iter *iter)
826 {
827 	struct netfs_io_request *wreq;
828 	struct netfs_inode *ictx = netfs_inode(mapping->host);
829 	struct folio_queue *fq;
830 	size_t size = iov_iter_count(iter);
831 	int ret;
832 
833 	if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
834 		return -EIO;
835 
836 	if (!mutex_trylock(&ictx->wb_lock)) {
837 		if (wbc->sync_mode == WB_SYNC_NONE) {
838 			netfs_stat(&netfs_n_wb_lock_skip);
839 			return 0;
840 		}
841 		netfs_stat(&netfs_n_wb_lock_wait);
842 		mutex_lock(&ictx->wb_lock);
843 	}
844 
845 	wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
846 	if (IS_ERR(wreq)) {
847 		ret = PTR_ERR(wreq);
848 		goto couldnt_start;
849 	}
850 
851 	__set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
852 	trace_netfs_write(wreq, netfs_write_trace_writeback_single);
853 	netfs_stat(&netfs_n_wh_writepages);
854 
855 	if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
856 		wreq->netfs_ops->begin_writeback(wreq);
857 
858 	for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
859 		for (int slot = 0; slot < folioq_count(fq); slot++) {
860 			struct folio *folio = folioq_folio(fq, slot);
861 			size_t part = umin(folioq_folio_size(fq, slot), size);
862 
863 			_debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
864 
865 			ret = netfs_write_folio_single(wreq, folio);
866 			if (ret < 0)
867 				goto stop;
868 			size -= part;
869 			if (size <= 0)
870 				goto stop;
871 		}
872 	}
873 
874 stop:
875 	for (int s = 0; s < NR_IO_STREAMS; s++)
876 		netfs_issue_write(wreq, &wreq->io_streams[s]);
877 	smp_wmb(); /* Write lists before ALL_QUEUED. */
878 	set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
879 
880 	mutex_unlock(&ictx->wb_lock);
881 	netfs_wake_collector(wreq);
882 
883 	netfs_put_request(wreq, netfs_rreq_trace_put_return);
884 	_leave(" = %d", ret);
885 	return ret;
886 
887 couldnt_start:
888 	mutex_unlock(&ictx->wb_lock);
889 	_leave(" = %d", ret);
890 	return ret;
891 }
892 EXPORT_SYMBOL(netfs_writeback_single);
893