xref: /linux/fs/netfs/write_collect.c (revision 56d8b784c56588cd40f98e4b1d4f6e29e3cb02b8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem write subrequest result collection, assessment
3  * and retrying.
4  *
5  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
6  * Written by David Howells (dhowells@redhat.com)
7  */
8 
9 #include <linux/export.h>
10 #include <linux/fs.h>
11 #include <linux/mm.h>
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include "internal.h"
15 
16 /* Notes made in the collector */
17 #define HIT_PENDING		0x01	/* A front op was still pending */
18 #define SOME_EMPTY		0x02	/* One of more streams are empty */
19 #define ALL_EMPTY		0x04	/* All streams are empty */
20 #define MAYBE_DISCONTIG		0x08	/* A front op may be discontiguous (rounded to PAGE_SIZE) */
21 #define NEED_REASSESS		0x10	/* Need to loop round and reassess */
22 #define REASSESS_DISCONTIG	0x20	/* Reassess discontiguity if contiguity advances */
23 #define MADE_PROGRESS		0x40	/* Made progress cleaning up a stream or the folio set */
24 #define BUFFERED		0x80	/* The pagecache needs cleaning up */
25 #define NEED_RETRY		0x100	/* A front op requests retrying */
26 #define SAW_FAILURE		0x200	/* One stream or hit a permanent failure */
27 
28 /*
29  * Successful completion of write of a folio to the server and/or cache.  Note
30  * that we are not allowed to lock the folio here on pain of deadlocking with
31  * truncate.
32  */
33 int netfs_folio_written_back(struct folio *folio)
34 {
35 	enum netfs_folio_trace why = netfs_folio_trace_clear;
36 	struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
37 	struct netfs_folio *finfo;
38 	struct netfs_group *group = NULL;
39 	int gcount = 0;
40 
41 	if ((finfo = netfs_folio_info(folio))) {
42 		/* Streaming writes cannot be redirtied whilst under writeback,
43 		 * so discard the streaming record.
44 		 */
45 		unsigned long long fend;
46 
47 		fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
48 		if (fend > ictx->zero_point)
49 			ictx->zero_point = fend;
50 
51 		folio_detach_private(folio);
52 		group = finfo->netfs_group;
53 		gcount++;
54 		kfree(finfo);
55 		why = netfs_folio_trace_clear_s;
56 		goto end_wb;
57 	}
58 
59 	if ((group = netfs_folio_group(folio))) {
60 		if (group == NETFS_FOLIO_COPY_TO_CACHE) {
61 			why = netfs_folio_trace_clear_cc;
62 			folio_detach_private(folio);
63 			goto end_wb;
64 		}
65 
66 		/* Need to detach the group pointer if the page didn't get
67 		 * redirtied.  If it has been redirtied, then it must be within
68 		 * the same group.
69 		 */
70 		why = netfs_folio_trace_redirtied;
71 		if (!folio_test_dirty(folio)) {
72 			folio_detach_private(folio);
73 			gcount++;
74 			why = netfs_folio_trace_clear_g;
75 		}
76 	}
77 
78 end_wb:
79 	trace_netfs_folio(folio, why);
80 	folio_end_writeback(folio);
81 	return gcount;
82 }
83 
84 /*
85  * Get hold of a folio we have under writeback.  We don't want to get the
86  * refcount on it.
87  */
88 static struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos)
89 {
90 	XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE);
91 	struct folio *folio;
92 
93 	rcu_read_lock();
94 
95 	for (;;) {
96 		xas_reset(&xas);
97 		folio = xas_load(&xas);
98 		if (xas_retry(&xas, folio))
99 			continue;
100 
101 		if (!folio || xa_is_value(folio))
102 			kdebug("R=%08x: folio %lx (%llx) not present",
103 			       wreq->debug_id, xas.xa_index, pos / PAGE_SIZE);
104 		BUG_ON(!folio || xa_is_value(folio));
105 
106 		if (folio == xas_reload(&xas))
107 			break;
108 	}
109 
110 	rcu_read_unlock();
111 
112 	if (WARN_ONCE(!folio_test_writeback(folio),
113 		      "R=%08x: folio %lx is not under writeback\n",
114 		      wreq->debug_id, folio->index)) {
115 		trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
116 	}
117 	return folio;
118 }
119 
120 /*
121  * Unlock any folios we've finished with.
122  */
123 static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
124 					  unsigned long long collected_to,
125 					  unsigned int *notes)
126 {
127 	for (;;) {
128 		struct folio *folio;
129 		struct netfs_folio *finfo;
130 		unsigned long long fpos, fend;
131 		size_t fsize, flen;
132 
133 		folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to);
134 
135 		fpos = folio_pos(folio);
136 		fsize = folio_size(folio);
137 		finfo = netfs_folio_info(folio);
138 		flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;
139 
140 		fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
141 
142 		trace_netfs_collect_folio(wreq, folio, fend, collected_to);
143 
144 		if (fpos + fsize > wreq->contiguity) {
145 			trace_netfs_collect_contig(wreq, fpos + fsize,
146 						   netfs_contig_trace_unlock);
147 			wreq->contiguity = fpos + fsize;
148 		}
149 
150 		/* Unlock any folio we've transferred all of. */
151 		if (collected_to < fend)
152 			break;
153 
154 		wreq->nr_group_rel += netfs_folio_written_back(folio);
155 		wreq->cleaned_to = fpos + fsize;
156 		*notes |= MADE_PROGRESS;
157 
158 		if (fpos + fsize >= collected_to)
159 			break;
160 	}
161 }
162 
163 /*
164  * Perform retries on the streams that need it.
165  */
166 static void netfs_retry_write_stream(struct netfs_io_request *wreq,
167 				     struct netfs_io_stream *stream)
168 {
169 	struct list_head *next;
170 
171 	_enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
172 
173 	if (list_empty(&stream->subrequests))
174 		return;
175 
176 	if (stream->source == NETFS_UPLOAD_TO_SERVER &&
177 	    wreq->netfs_ops->retry_request)
178 		wreq->netfs_ops->retry_request(wreq, stream);
179 
180 	if (unlikely(stream->failed))
181 		return;
182 
183 	/* If there's no renegotiation to do, just resend each failed subreq. */
184 	if (!stream->prepare_write) {
185 		struct netfs_io_subrequest *subreq;
186 
187 		list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
188 			if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
189 				break;
190 			if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
191 				__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
192 				netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
193 				netfs_reissue_write(stream, subreq);
194 			}
195 		}
196 		return;
197 	}
198 
199 	next = stream->subrequests.next;
200 
201 	do {
202 		struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
203 		unsigned long long start, len;
204 		size_t part;
205 		bool boundary = false;
206 
207 		/* Go through the stream and find the next span of contiguous
208 		 * data that we then rejig (cifs, for example, needs the wsize
209 		 * renegotiating) and reissue.
210 		 */
211 		from = list_entry(next, struct netfs_io_subrequest, rreq_link);
212 		to = from;
213 		start = from->start + from->transferred;
214 		len   = from->len   - from->transferred;
215 
216 		if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
217 		    !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
218 			return;
219 
220 		list_for_each_continue(next, &stream->subrequests) {
221 			subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
222 			if (subreq->start + subreq->transferred != start + len ||
223 			    test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
224 			    !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
225 				break;
226 			to = subreq;
227 			len += to->len;
228 		}
229 
230 		/* Work through the sublist. */
231 		subreq = from;
232 		list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
233 			if (!len)
234 				break;
235 			/* Renegotiate max_len (wsize) */
236 			trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
237 			__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
238 			__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
239 			stream->prepare_write(subreq);
240 
241 			part = min(len, subreq->max_len);
242 			subreq->len = part;
243 			subreq->start = start;
244 			subreq->transferred = 0;
245 			len -= part;
246 			start += part;
247 			if (len && subreq == to &&
248 			    __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
249 				boundary = true;
250 
251 			netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
252 			netfs_reissue_write(stream, subreq);
253 			if (subreq == to)
254 				break;
255 		}
256 
257 		/* If we managed to use fewer subreqs, we can discard the
258 		 * excess; if we used the same number, then we're done.
259 		 */
260 		if (!len) {
261 			if (subreq == to)
262 				continue;
263 			list_for_each_entry_safe_from(subreq, tmp,
264 						      &stream->subrequests, rreq_link) {
265 				trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
266 				list_del(&subreq->rreq_link);
267 				netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
268 				if (subreq == to)
269 					break;
270 			}
271 			continue;
272 		}
273 
274 		/* We ran out of subrequests, so we need to allocate some more
275 		 * and insert them after.
276 		 */
277 		do {
278 			subreq = netfs_alloc_subrequest(wreq);
279 			subreq->source		= to->source;
280 			subreq->start		= start;
281 			subreq->max_len		= len;
282 			subreq->max_nr_segs	= INT_MAX;
283 			subreq->debug_index	= atomic_inc_return(&wreq->subreq_counter);
284 			subreq->stream_nr	= to->stream_nr;
285 			__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
286 
287 			trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
288 					     refcount_read(&subreq->ref),
289 					     netfs_sreq_trace_new);
290 			netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
291 
292 			list_add(&subreq->rreq_link, &to->rreq_link);
293 			to = list_next_entry(to, rreq_link);
294 			trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
295 
296 			switch (stream->source) {
297 			case NETFS_UPLOAD_TO_SERVER:
298 				netfs_stat(&netfs_n_wh_upload);
299 				subreq->max_len = min(len, wreq->wsize);
300 				break;
301 			case NETFS_WRITE_TO_CACHE:
302 				netfs_stat(&netfs_n_wh_write);
303 				break;
304 			default:
305 				WARN_ON_ONCE(1);
306 			}
307 
308 			stream->prepare_write(subreq);
309 
310 			part = min(len, subreq->max_len);
311 			subreq->len = subreq->transferred + part;
312 			len -= part;
313 			start += part;
314 			if (!len && boundary) {
315 				__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
316 				boundary = false;
317 			}
318 
319 			netfs_reissue_write(stream, subreq);
320 			if (!len)
321 				break;
322 
323 		} while (len);
324 
325 	} while (!list_is_head(next, &stream->subrequests));
326 }
327 
328 /*
329  * Perform retries on the streams that need it.  If we're doing content
330  * encryption and the server copy changed due to a third-party write, we may
331  * need to do an RMW cycle and also rewrite the data to the cache.
332  */
333 static void netfs_retry_writes(struct netfs_io_request *wreq)
334 {
335 	struct netfs_io_subrequest *subreq;
336 	struct netfs_io_stream *stream;
337 	int s;
338 
339 	/* Wait for all outstanding I/O to quiesce before performing retries as
340 	 * we may need to renegotiate the I/O sizes.
341 	 */
342 	for (s = 0; s < NR_IO_STREAMS; s++) {
343 		stream = &wreq->io_streams[s];
344 		if (!stream->active)
345 			continue;
346 
347 		list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
348 			wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
349 				    TASK_UNINTERRUPTIBLE);
350 		}
351 	}
352 
353 	// TODO: Enc: Fetch changed partial pages
354 	// TODO: Enc: Reencrypt content if needed.
355 	// TODO: Enc: Wind back transferred point.
356 	// TODO: Enc: Mark cache pages for retry.
357 
358 	for (s = 0; s < NR_IO_STREAMS; s++) {
359 		stream = &wreq->io_streams[s];
360 		if (stream->need_retry) {
361 			stream->need_retry = false;
362 			netfs_retry_write_stream(wreq, stream);
363 		}
364 	}
365 }
366 
367 /*
368  * Collect and assess the results of various write subrequests.  We may need to
369  * retry some of the results - or even do an RMW cycle for content crypto.
370  *
371  * Note that we have a number of parallel, overlapping lists of subrequests,
372  * one to the server and one to the local cache for example, which may not be
373  * the same size or starting position and may not even correspond in boundary
374  * alignment.
375  */
376 static void netfs_collect_write_results(struct netfs_io_request *wreq)
377 {
378 	struct netfs_io_subrequest *front, *remove;
379 	struct netfs_io_stream *stream;
380 	unsigned long long collected_to;
381 	unsigned int notes;
382 	int s;
383 
384 	_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
385 	trace_netfs_collect(wreq);
386 	trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
387 
388 reassess_streams:
389 	smp_rmb();
390 	collected_to = ULLONG_MAX;
391 	if (wreq->origin == NETFS_WRITEBACK)
392 		notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG;
393 	else if (wreq->origin == NETFS_WRITETHROUGH)
394 		notes = ALL_EMPTY | BUFFERED;
395 	else
396 		notes = ALL_EMPTY;
397 
398 	/* Remove completed subrequests from the front of the streams and
399 	 * advance the completion point on each stream.  We stop when we hit
400 	 * something that's in progress.  The issuer thread may be adding stuff
401 	 * to the tail whilst we're doing this.
402 	 *
403 	 * We must not, however, merge in discontiguities that span whole
404 	 * folios that aren't under writeback.  This is made more complicated
405 	 * by the folios in the gap being of unpredictable sizes - if they even
406 	 * exist - but we don't want to look them up.
407 	 */
408 	for (s = 0; s < NR_IO_STREAMS; s++) {
409 		loff_t rstart, rend;
410 
411 		stream = &wreq->io_streams[s];
412 		/* Read active flag before list pointers */
413 		if (!smp_load_acquire(&stream->active))
414 			continue;
415 
416 		front = stream->front;
417 		while (front) {
418 			trace_netfs_collect_sreq(wreq, front);
419 			//_debug("sreq [%x] %llx %zx/%zx",
420 			//       front->debug_index, front->start, front->transferred, front->len);
421 
422 			/* Stall if there may be a discontinuity. */
423 			rstart = round_down(front->start, PAGE_SIZE);
424 			if (rstart > wreq->contiguity) {
425 				if (wreq->contiguity > stream->collected_to) {
426 					trace_netfs_collect_gap(wreq, stream,
427 								wreq->contiguity, 'D');
428 					stream->collected_to = wreq->contiguity;
429 				}
430 				notes |= REASSESS_DISCONTIG;
431 				break;
432 			}
433 			rend = round_up(front->start + front->len, PAGE_SIZE);
434 			if (rend > wreq->contiguity) {
435 				trace_netfs_collect_contig(wreq, rend,
436 							   netfs_contig_trace_collect);
437 				wreq->contiguity = rend;
438 				if (notes & REASSESS_DISCONTIG)
439 					notes |= NEED_REASSESS;
440 			}
441 			notes &= ~MAYBE_DISCONTIG;
442 
443 			/* Stall if the front is still undergoing I/O. */
444 			if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
445 				notes |= HIT_PENDING;
446 				break;
447 			}
448 			smp_rmb(); /* Read counters after I-P flag. */
449 
450 			if (stream->failed) {
451 				stream->collected_to = front->start + front->len;
452 				notes |= MADE_PROGRESS | SAW_FAILURE;
453 				goto cancel;
454 			}
455 			if (front->start + front->transferred > stream->collected_to) {
456 				stream->collected_to = front->start + front->transferred;
457 				stream->transferred = stream->collected_to - wreq->start;
458 				notes |= MADE_PROGRESS;
459 			}
460 			if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
461 				stream->failed = true;
462 				stream->error = front->error;
463 				if (stream->source == NETFS_UPLOAD_TO_SERVER)
464 					mapping_set_error(wreq->mapping, front->error);
465 				notes |= NEED_REASSESS | SAW_FAILURE;
466 				break;
467 			}
468 			if (front->transferred < front->len) {
469 				stream->need_retry = true;
470 				notes |= NEED_RETRY | MADE_PROGRESS;
471 				break;
472 			}
473 
474 		cancel:
475 			/* Remove if completely consumed. */
476 			spin_lock(&wreq->lock);
477 
478 			remove = front;
479 			list_del_init(&front->rreq_link);
480 			front = list_first_entry_or_null(&stream->subrequests,
481 							 struct netfs_io_subrequest, rreq_link);
482 			stream->front = front;
483 			if (!front) {
484 				unsigned long long jump_to = atomic64_read(&wreq->issued_to);
485 
486 				if (stream->collected_to < jump_to) {
487 					trace_netfs_collect_gap(wreq, stream, jump_to, 'A');
488 					stream->collected_to = jump_to;
489 				}
490 			}
491 
492 			spin_unlock(&wreq->lock);
493 			netfs_put_subrequest(remove, false,
494 					     notes & SAW_FAILURE ?
495 					     netfs_sreq_trace_put_cancel :
496 					     netfs_sreq_trace_put_done);
497 		}
498 
499 		if (front)
500 			notes &= ~ALL_EMPTY;
501 		else
502 			notes |= SOME_EMPTY;
503 
504 		if (stream->collected_to < collected_to)
505 			collected_to = stream->collected_to;
506 	}
507 
508 	if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
509 		wreq->collected_to = collected_to;
510 
511 	/* If we have an empty stream, we need to jump it forward over any gap
512 	 * otherwise the collection point will never advance.
513 	 *
514 	 * Note that the issuer always adds to the stream with the lowest
515 	 * so-far submitted start, so if we see two consecutive subreqs in one
516 	 * stream with nothing between then in another stream, then the second
517 	 * stream has a gap that can be jumped.
518 	 */
519 	if (notes & SOME_EMPTY) {
520 		unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
521 
522 		for (s = 0; s < NR_IO_STREAMS; s++) {
523 			stream = &wreq->io_streams[s];
524 			if (stream->active &&
525 			    stream->front &&
526 			    stream->front->start < jump_to)
527 				jump_to = stream->front->start;
528 		}
529 
530 		for (s = 0; s < NR_IO_STREAMS; s++) {
531 			stream = &wreq->io_streams[s];
532 			if (stream->active &&
533 			    !stream->front &&
534 			    stream->collected_to < jump_to) {
535 				trace_netfs_collect_gap(wreq, stream, jump_to, 'B');
536 				stream->collected_to = jump_to;
537 			}
538 		}
539 	}
540 
541 	for (s = 0; s < NR_IO_STREAMS; s++) {
542 		stream = &wreq->io_streams[s];
543 		if (stream->active)
544 			trace_netfs_collect_stream(wreq, stream);
545 	}
546 
547 	trace_netfs_collect_state(wreq, wreq->collected_to, notes);
548 
549 	/* Unlock any folios that we have now finished with. */
550 	if (notes & BUFFERED) {
551 		unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity);
552 
553 		if (wreq->cleaned_to < clean_to)
554 			netfs_writeback_unlock_folios(wreq, clean_to, &notes);
555 	} else {
556 		wreq->cleaned_to = wreq->collected_to;
557 	}
558 
559 	// TODO: Discard encryption buffers
560 
561 	/* If all streams are discontiguous with the last folio we cleared, we
562 	 * may need to skip a set of folios.
563 	 */
564 	if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) {
565 		unsigned long long jump_to = ULLONG_MAX;
566 
567 		for (s = 0; s < NR_IO_STREAMS; s++) {
568 			stream = &wreq->io_streams[s];
569 			if (stream->active && stream->front &&
570 			    stream->front->start < jump_to)
571 				jump_to = stream->front->start;
572 		}
573 
574 		trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump);
575 		wreq->contiguity = jump_to;
576 		wreq->cleaned_to = jump_to;
577 		wreq->collected_to = jump_to;
578 		for (s = 0; s < NR_IO_STREAMS; s++) {
579 			stream = &wreq->io_streams[s];
580 			if (stream->collected_to < jump_to)
581 				stream->collected_to = jump_to;
582 		}
583 		//cond_resched();
584 		notes |= MADE_PROGRESS;
585 		goto reassess_streams;
586 	}
587 
588 	if (notes & NEED_RETRY)
589 		goto need_retry;
590 	if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
591 		trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
592 		clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
593 		wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE);
594 	}
595 
596 	if (notes & NEED_REASSESS) {
597 		//cond_resched();
598 		goto reassess_streams;
599 	}
600 	if (notes & MADE_PROGRESS) {
601 		//cond_resched();
602 		goto reassess_streams;
603 	}
604 
605 out:
606 	netfs_put_group_many(wreq->group, wreq->nr_group_rel);
607 	wreq->nr_group_rel = 0;
608 	_leave(" = %x", notes);
609 	return;
610 
611 need_retry:
612 	/* Okay...  We're going to have to retry one or both streams.  Note
613 	 * that any partially completed op will have had any wholly transferred
614 	 * folios removed from it.
615 	 */
616 	_debug("retry");
617 	netfs_retry_writes(wreq);
618 	goto out;
619 }
620 
621 /*
622  * Perform the collection of subrequests, folios and encryption buffers.
623  */
624 void netfs_write_collection_worker(struct work_struct *work)
625 {
626 	struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
627 	struct netfs_inode *ictx = netfs_inode(wreq->inode);
628 	size_t transferred;
629 	int s;
630 
631 	_enter("R=%x", wreq->debug_id);
632 
633 	netfs_see_request(wreq, netfs_rreq_trace_see_work);
634 	if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
635 		netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
636 		return;
637 	}
638 
639 	netfs_collect_write_results(wreq);
640 
641 	/* We're done when the app thread has finished posting subreqs and all
642 	 * the queues in all the streams are empty.
643 	 */
644 	if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
645 		netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
646 		return;
647 	}
648 	smp_rmb(); /* Read ALL_QUEUED before lists. */
649 
650 	transferred = LONG_MAX;
651 	for (s = 0; s < NR_IO_STREAMS; s++) {
652 		struct netfs_io_stream *stream = &wreq->io_streams[s];
653 		if (!stream->active)
654 			continue;
655 		if (!list_empty(&stream->subrequests)) {
656 			netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
657 			return;
658 		}
659 		if (stream->transferred < transferred)
660 			transferred = stream->transferred;
661 	}
662 
663 	/* Okay, declare that all I/O is complete. */
664 	wreq->transferred = transferred;
665 	trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
666 
667 	if (wreq->io_streams[1].active &&
668 	    wreq->io_streams[1].failed) {
669 		/* Cache write failure doesn't prevent writeback completion
670 		 * unless we're in disconnected mode.
671 		 */
672 		ictx->ops->invalidate_cache(wreq);
673 	}
674 
675 	if (wreq->cleanup)
676 		wreq->cleanup(wreq);
677 
678 	if (wreq->origin == NETFS_DIO_WRITE &&
679 	    wreq->mapping->nrpages) {
680 		/* mmap may have got underfoot and we may now have folios
681 		 * locally covering the region we just wrote.  Attempt to
682 		 * discard the folios, but leave in place any modified locally.
683 		 * ->write_iter() is prevented from interfering by the DIO
684 		 * counter.
685 		 */
686 		pgoff_t first = wreq->start >> PAGE_SHIFT;
687 		pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
688 		invalidate_inode_pages2_range(wreq->mapping, first, last);
689 	}
690 
691 	if (wreq->origin == NETFS_DIO_WRITE)
692 		inode_dio_end(wreq->inode);
693 
694 	_debug("finished");
695 	trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
696 	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
697 	wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
698 
699 	if (wreq->iocb) {
700 		size_t written = min(wreq->transferred, wreq->len);
701 		wreq->iocb->ki_pos += written;
702 		if (wreq->iocb->ki_complete)
703 			wreq->iocb->ki_complete(
704 				wreq->iocb, wreq->error ? wreq->error : written);
705 		wreq->iocb = VFS_PTR_POISON;
706 	}
707 
708 	netfs_clear_subrequests(wreq, false);
709 	netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
710 }
711 
712 /*
713  * Wake the collection work item.
714  */
715 void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
716 {
717 	if (!work_pending(&wreq->work)) {
718 		netfs_get_request(wreq, netfs_rreq_trace_get_work);
719 		if (!queue_work(system_unbound_wq, &wreq->work))
720 			netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
721 	}
722 }
723 
724 /**
725  * netfs_write_subrequest_terminated - Note the termination of a write operation.
726  * @_op: The I/O request that has terminated.
727  * @transferred_or_error: The amount of data transferred or an error code.
728  * @was_async: The termination was asynchronous
729  *
730  * This tells the library that a contributory write I/O operation has
731  * terminated, one way or another, and that it should collect the results.
732  *
733  * The caller indicates in @transferred_or_error the outcome of the operation,
734  * supplying a positive value to indicate the number of bytes transferred or a
735  * negative error code.  The library will look after reissuing I/O operations
736  * as appropriate and writing downloaded data to the cache.
737  *
738  * If @was_async is true, the caller might be running in softirq or interrupt
739  * context and we can't sleep.
740  *
741  * When this is called, ownership of the subrequest is transferred back to the
742  * library, along with a ref.
743  *
744  * Note that %_op is a void* so that the function can be passed to
745  * kiocb::term_func without the need for a casting wrapper.
746  */
747 void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
748 				       bool was_async)
749 {
750 	struct netfs_io_subrequest *subreq = _op;
751 	struct netfs_io_request *wreq = subreq->rreq;
752 	struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
753 
754 	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
755 
756 	switch (subreq->source) {
757 	case NETFS_UPLOAD_TO_SERVER:
758 		netfs_stat(&netfs_n_wh_upload_done);
759 		break;
760 	case NETFS_WRITE_TO_CACHE:
761 		netfs_stat(&netfs_n_wh_write_done);
762 		break;
763 	case NETFS_INVALID_WRITE:
764 		break;
765 	default:
766 		BUG();
767 	}
768 
769 	if (IS_ERR_VALUE(transferred_or_error)) {
770 		subreq->error = transferred_or_error;
771 		if (subreq->error == -EAGAIN)
772 			set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
773 		else
774 			set_bit(NETFS_SREQ_FAILED, &subreq->flags);
775 		trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write);
776 
777 		switch (subreq->source) {
778 		case NETFS_WRITE_TO_CACHE:
779 			netfs_stat(&netfs_n_wh_write_failed);
780 			break;
781 		case NETFS_UPLOAD_TO_SERVER:
782 			netfs_stat(&netfs_n_wh_upload_failed);
783 			break;
784 		default:
785 			break;
786 		}
787 		trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause);
788 		set_bit(NETFS_RREQ_PAUSE, &wreq->flags);
789 	} else {
790 		if (WARN(transferred_or_error > subreq->len - subreq->transferred,
791 			 "Subreq excess write: R=%x[%x] %zd > %zu - %zu",
792 			 wreq->debug_id, subreq->debug_index,
793 			 transferred_or_error, subreq->len, subreq->transferred))
794 			transferred_or_error = subreq->len - subreq->transferred;
795 
796 		subreq->error = 0;
797 		subreq->transferred += transferred_or_error;
798 
799 		if (subreq->transferred < subreq->len)
800 			set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
801 	}
802 
803 	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
804 
805 	clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
806 	wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS);
807 
808 	/* If we are at the head of the queue, wake up the collector,
809 	 * transferring a ref to it if we were the ones to do so.
810 	 */
811 	if (list_is_first(&subreq->rreq_link, &stream->subrequests))
812 		netfs_wake_write_collector(wreq, was_async);
813 
814 	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
815 }
816 EXPORT_SYMBOL(netfs_write_subrequest_terminated);
817