1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Network filesystem read subrequest result collection, assessment and
3 * retrying.
4 *
5 * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
6 * Written by David Howells (dhowells@redhat.com)
7 */
8
9 #include <linux/export.h>
10 #include <linux/fs.h>
11 #include <linux/mm.h>
12 #include <linux/pagemap.h>
13 #include <linux/slab.h>
14 #include <linux/task_io_accounting_ops.h>
15 #include "internal.h"
16
17 /* Notes made in the collector */
18 #define HIT_PENDING 0x01 /* A front op was still pending */
19 #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
20 #define BUFFERED 0x08 /* The pagecache needs cleaning up */
21 #define NEED_RETRY 0x10 /* A front op requests retrying */
22 #define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */
23 #define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */
24
25 /*
26 * Clear the unread part of an I/O request.
27 */
netfs_clear_unread(struct netfs_io_subrequest * subreq)28 static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
29 {
30 netfs_reset_iter(subreq);
31 WARN_ON_ONCE(subreq->len - subreq->transferred != iov_iter_count(&subreq->io_iter));
32 iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter);
33 if (subreq->start + subreq->transferred >= subreq->rreq->i_size)
34 __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
35 }
36
37 /*
38 * Flush, mark and unlock a folio that's now completely read. If we want to
39 * cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
40 * dirty and let writeback handle it.
41 */
netfs_unlock_read_folio(struct netfs_io_request * rreq,struct folio_queue * folioq,int slot)42 static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
43 struct folio_queue *folioq,
44 int slot)
45 {
46 struct netfs_folio *finfo;
47 struct folio *folio = folioq_folio(folioq, slot);
48
49 if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
50 trace_netfs_folio(folio, netfs_folio_trace_abandon);
51 goto just_unlock;
52 }
53
54 flush_dcache_folio(folio);
55 folio_mark_uptodate(folio);
56
57 if (!test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
58 finfo = netfs_folio_info(folio);
59 if (finfo) {
60 trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
61 if (finfo->netfs_group)
62 folio_change_private(folio, finfo->netfs_group);
63 else
64 folio_detach_private(folio);
65 kfree(finfo);
66 }
67
68 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) {
69 if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
70 trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
71 folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
72 folio_mark_dirty(folio);
73 }
74 } else {
75 trace_netfs_folio(folio, netfs_folio_trace_read_done);
76 }
77
78 folioq_clear(folioq, slot);
79 } else {
80 // TODO: Use of PG_private_2 is deprecated.
81 if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags))
82 netfs_pgpriv2_copy_to_cache(rreq, folio);
83 }
84
85 just_unlock:
86 if (folio->index == rreq->no_unlock_folio &&
87 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
88 _debug("no unlock");
89 } else {
90 trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
91 folio_unlock(folio);
92 }
93
94 folioq_clear(folioq, slot);
95 }
96
97 /*
98 * Unlock any folios we've finished with.
99 */
netfs_read_unlock_folios(struct netfs_io_request * rreq,unsigned int * notes)100 static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
101 unsigned int *notes)
102 {
103 struct folio_queue *folioq = rreq->buffer.tail;
104 unsigned long long collected_to = rreq->collected_to;
105 unsigned int slot = rreq->buffer.first_tail_slot;
106
107 if (rreq->cleaned_to >= rreq->collected_to)
108 return;
109
110 // TODO: Begin decryption
111
112 if (slot >= folioq_nr_slots(folioq)) {
113 folioq = rolling_buffer_delete_spent(&rreq->buffer);
114 if (!folioq) {
115 rreq->front_folio_order = 0;
116 return;
117 }
118 slot = 0;
119 }
120
121 for (;;) {
122 struct folio *folio;
123 unsigned long long fpos, fend;
124 unsigned int order;
125 size_t fsize;
126
127 if (*notes & COPY_TO_CACHE)
128 set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
129
130 folio = folioq_folio(folioq, slot);
131 if (WARN_ONCE(!folio_test_locked(folio),
132 "R=%08x: folio %lx is not locked\n",
133 rreq->debug_id, folio->index))
134 trace_netfs_folio(folio, netfs_folio_trace_not_locked);
135
136 order = folioq_folio_order(folioq, slot);
137 rreq->front_folio_order = order;
138 fsize = PAGE_SIZE << order;
139 fpos = folio_pos(folio);
140 fend = umin(fpos + fsize, rreq->i_size);
141
142 trace_netfs_collect_folio(rreq, folio, fend, collected_to);
143
144 /* Unlock any folio we've transferred all of. */
145 if (collected_to < fend)
146 break;
147
148 netfs_unlock_read_folio(rreq, folioq, slot);
149 WRITE_ONCE(rreq->cleaned_to, fpos + fsize);
150 *notes |= MADE_PROGRESS;
151
152 clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
153
154 /* Clean up the head folioq. If we clear an entire folioq, then
155 * we can get rid of it provided it's not also the tail folioq
156 * being filled by the issuer.
157 */
158 folioq_clear(folioq, slot);
159 slot++;
160 if (slot >= folioq_nr_slots(folioq)) {
161 folioq = rolling_buffer_delete_spent(&rreq->buffer);
162 if (!folioq)
163 goto done;
164 slot = 0;
165 trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress);
166 }
167
168 if (fpos + fsize >= collected_to)
169 break;
170 }
171
172 rreq->buffer.tail = folioq;
173 done:
174 rreq->buffer.first_tail_slot = slot;
175 }
176
177 /*
178 * Collect and assess the results of various read subrequests. We may need to
179 * retry some of the results.
180 *
181 * Note that we have a sequence of subrequests, which may be drawing on
182 * different sources and may or may not be the same size or starting position
183 * and may not even correspond in boundary alignment.
184 */
netfs_collect_read_results(struct netfs_io_request * rreq)185 static void netfs_collect_read_results(struct netfs_io_request *rreq)
186 {
187 struct netfs_io_subrequest *front, *remove;
188 struct netfs_io_stream *stream = &rreq->io_streams[0];
189 unsigned int notes;
190
191 _enter("%llx-%llx", rreq->start, rreq->start + rreq->len);
192 trace_netfs_rreq(rreq, netfs_rreq_trace_collect);
193 trace_netfs_collect(rreq);
194
195 reassess:
196 if (rreq->origin == NETFS_READAHEAD ||
197 rreq->origin == NETFS_READPAGE ||
198 rreq->origin == NETFS_READ_FOR_WRITE)
199 notes = BUFFERED;
200 else
201 notes = 0;
202
203 /* Remove completed subrequests from the front of the stream and
204 * advance the completion point. We stop when we hit something that's
205 * in progress. The issuer thread may be adding stuff to the tail
206 * whilst we're doing this.
207 */
208 front = READ_ONCE(stream->front);
209 while (front) {
210 size_t transferred;
211
212 trace_netfs_collect_sreq(rreq, front);
213 _debug("sreq [%x] %llx %zx/%zx",
214 front->debug_index, front->start, front->transferred, front->len);
215
216 if (stream->collected_to < front->start) {
217 trace_netfs_collect_gap(rreq, stream, front->start, 'F');
218 stream->collected_to = front->start;
219 }
220
221 if (netfs_check_subreq_in_progress(front))
222 notes |= HIT_PENDING;
223 smp_rmb(); /* Read counters after IN_PROGRESS flag. */
224 transferred = READ_ONCE(front->transferred);
225
226 /* If we can now collect the next folio, do so. We don't want
227 * to defer this as we have to decide whether we need to copy
228 * to the cache or not, and that may differ between adjacent
229 * subreqs.
230 */
231 if (notes & BUFFERED) {
232 size_t fsize = PAGE_SIZE << rreq->front_folio_order;
233
234 /* Clear the tail of a short read. */
235 if (!(notes & HIT_PENDING) &&
236 front->error == 0 &&
237 transferred < front->len &&
238 (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) ||
239 test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) {
240 netfs_clear_unread(front);
241 transferred = front->transferred = front->len;
242 trace_netfs_sreq(front, netfs_sreq_trace_clear);
243 }
244
245 stream->collected_to = front->start + transferred;
246 rreq->collected_to = stream->collected_to;
247
248 if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
249 notes |= COPY_TO_CACHE;
250
251 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
252 rreq->abandon_to = front->start + front->len;
253 front->transferred = front->len;
254 transferred = front->len;
255 trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
256 }
257 if (front->start + transferred >= rreq->cleaned_to + fsize ||
258 test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
259 netfs_read_unlock_folios(rreq, ¬es);
260 } else {
261 stream->collected_to = front->start + transferred;
262 rreq->collected_to = stream->collected_to;
263 }
264
265 /* Stall if the front is still undergoing I/O. */
266 if (notes & HIT_PENDING)
267 break;
268
269 if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
270 if (!stream->failed) {
271 stream->error = front->error;
272 rreq->error = front->error;
273 set_bit(NETFS_RREQ_FAILED, &rreq->flags);
274 stream->failed = true;
275 }
276 notes |= MADE_PROGRESS | ABANDON_SREQ;
277 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
278 stream->need_retry = true;
279 notes |= NEED_RETRY | MADE_PROGRESS;
280 break;
281 } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
282 notes |= MADE_PROGRESS;
283 } else {
284 if (!stream->failed) {
285 stream->transferred += transferred;
286 stream->transferred_valid = true;
287 }
288 if (front->transferred < front->len)
289 set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
290 notes |= MADE_PROGRESS;
291 }
292
293 /* Remove if completely consumed. */
294 stream->source = front->source;
295 spin_lock(&rreq->lock);
296
297 remove = front;
298 trace_netfs_sreq(front,
299 notes & ABANDON_SREQ ?
300 netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed);
301 list_del_init(&front->rreq_link);
302 front = list_first_entry_or_null(&stream->subrequests,
303 struct netfs_io_subrequest, rreq_link);
304 stream->front = front;
305 spin_unlock(&rreq->lock);
306 netfs_put_subrequest(remove,
307 notes & ABANDON_SREQ ?
308 netfs_sreq_trace_put_abandon :
309 netfs_sreq_trace_put_done);
310 }
311
312 trace_netfs_collect_stream(rreq, stream);
313 trace_netfs_collect_state(rreq, rreq->collected_to, notes);
314
315 if (!(notes & BUFFERED))
316 rreq->cleaned_to = rreq->collected_to;
317
318 if (notes & NEED_RETRY)
319 goto need_retry;
320 if (notes & MADE_PROGRESS) {
321 netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
322 //cond_resched();
323 goto reassess;
324 }
325
326 out:
327 _leave(" = %x", notes);
328 return;
329
330 need_retry:
331 /* Okay... We're going to have to retry parts of the stream. Note
332 * that any partially completed op will have had any wholly transferred
333 * folios removed from it.
334 */
335 _debug("retry");
336 netfs_retry_reads(rreq);
337 goto out;
338 }
339
340 /*
341 * Do page flushing and suchlike after DIO.
342 */
netfs_rreq_assess_dio(struct netfs_io_request * rreq)343 static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
344 {
345 unsigned int i;
346
347 if (rreq->origin == NETFS_UNBUFFERED_READ ||
348 rreq->origin == NETFS_DIO_READ) {
349 for (i = 0; i < rreq->direct_bv_count; i++) {
350 flush_dcache_page(rreq->direct_bv[i].bv_page);
351 // TODO: cifs marks pages in the destination buffer
352 // dirty under some circumstances after a read. Do we
353 // need to do that too?
354 set_page_dirty(rreq->direct_bv[i].bv_page);
355 }
356 }
357
358 if (rreq->iocb) {
359 rreq->iocb->ki_pos += rreq->transferred;
360 if (rreq->iocb->ki_complete) {
361 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
362 rreq->iocb->ki_complete(
363 rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
364 }
365 }
366 if (rreq->netfs_ops->done)
367 rreq->netfs_ops->done(rreq);
368 if (rreq->origin == NETFS_UNBUFFERED_READ ||
369 rreq->origin == NETFS_DIO_READ)
370 inode_dio_end(rreq->inode);
371 }
372
373 /*
374 * Do processing after reading a monolithic single object.
375 */
netfs_rreq_assess_single(struct netfs_io_request * rreq)376 static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
377 {
378 struct netfs_io_stream *stream = &rreq->io_streams[0];
379
380 if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER &&
381 fscache_resources_valid(&rreq->cache_resources)) {
382 trace_netfs_rreq(rreq, netfs_rreq_trace_dirty);
383 netfs_single_mark_inode_dirty(rreq->inode);
384 }
385
386 if (rreq->iocb) {
387 rreq->iocb->ki_pos += rreq->transferred;
388 if (rreq->iocb->ki_complete) {
389 trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
390 rreq->iocb->ki_complete(
391 rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
392 }
393 }
394 if (rreq->netfs_ops->done)
395 rreq->netfs_ops->done(rreq);
396 }
397
398 /*
399 * Perform the collection of subrequests and folios.
400 *
401 * Note that we're in normal kernel thread context at this point, possibly
402 * running on a workqueue.
403 */
netfs_read_collection(struct netfs_io_request * rreq)404 bool netfs_read_collection(struct netfs_io_request *rreq)
405 {
406 struct netfs_io_stream *stream = &rreq->io_streams[0];
407
408 netfs_collect_read_results(rreq);
409
410 /* We're done when the app thread has finished posting subreqs and the
411 * queue is empty.
412 */
413 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
414 return false;
415 smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
416
417 if (!list_empty(&stream->subrequests))
418 return false;
419
420 /* Okay, declare that all I/O is complete. */
421 rreq->transferred = stream->transferred;
422 trace_netfs_rreq(rreq, netfs_rreq_trace_complete);
423
424 //netfs_rreq_is_still_valid(rreq);
425
426 switch (rreq->origin) {
427 case NETFS_UNBUFFERED_READ:
428 case NETFS_DIO_READ:
429 case NETFS_READ_GAPS:
430 netfs_rreq_assess_dio(rreq);
431 break;
432 case NETFS_READ_SINGLE:
433 netfs_rreq_assess_single(rreq);
434 break;
435 default:
436 break;
437 }
438 task_io_account_read(rreq->transferred);
439
440 netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
441 /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
442
443 trace_netfs_rreq(rreq, netfs_rreq_trace_done);
444 netfs_clear_subrequests(rreq);
445 netfs_unlock_abandoned_read_pages(rreq);
446 if (unlikely(rreq->copy_to_cache))
447 netfs_pgpriv2_end_copy_to_cache(rreq);
448 return true;
449 }
450
netfs_read_collection_worker(struct work_struct * work)451 void netfs_read_collection_worker(struct work_struct *work)
452 {
453 struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
454
455 netfs_see_request(rreq, netfs_rreq_trace_see_work);
456 if (netfs_check_rreq_in_progress(rreq)) {
457 if (netfs_read_collection(rreq))
458 /* Drop the ref from the IN_PROGRESS flag. */
459 netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
460 else
461 netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
462 }
463 }
464
465 /**
466 * netfs_read_subreq_progress - Note progress of a read operation.
467 * @subreq: The read request that has terminated.
468 *
469 * This tells the read side of netfs lib that a contributory I/O operation has
470 * made some progress and that it may be possible to unlock some folios.
471 *
472 * Before calling, the filesystem should update subreq->transferred to track
473 * the amount of data copied into the output buffer.
474 */
netfs_read_subreq_progress(struct netfs_io_subrequest * subreq)475 void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
476 {
477 struct netfs_io_request *rreq = subreq->rreq;
478 struct netfs_io_stream *stream = &rreq->io_streams[0];
479 size_t fsize = PAGE_SIZE << rreq->front_folio_order;
480
481 trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
482
483 /* If we are at the head of the queue, wake up the collector,
484 * getting a ref to it if we were the ones to do so.
485 */
486 if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize &&
487 (rreq->origin == NETFS_READAHEAD ||
488 rreq->origin == NETFS_READPAGE ||
489 rreq->origin == NETFS_READ_FOR_WRITE) &&
490 list_is_first(&subreq->rreq_link, &stream->subrequests)
491 ) {
492 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
493 netfs_wake_collector(rreq);
494 }
495 }
496 EXPORT_SYMBOL(netfs_read_subreq_progress);
497
498 /**
499 * netfs_read_subreq_terminated - Note the termination of an I/O operation.
500 * @subreq: The I/O request that has terminated.
501 *
502 * This tells the read helper that a contributory I/O operation has terminated,
503 * one way or another, and that it should integrate the results.
504 *
505 * The caller indicates the outcome of the operation through @subreq->error,
506 * supplying 0 to indicate a successful or retryable transfer (if
507 * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will
508 * look after reissuing I/O operations as appropriate and writing downloaded
509 * data to the cache.
510 *
511 * Before calling, the filesystem should update subreq->transferred to track
512 * the amount of data copied into the output buffer.
513 */
netfs_read_subreq_terminated(struct netfs_io_subrequest * subreq)514 void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
515 {
516 struct netfs_io_request *rreq = subreq->rreq;
517
518 switch (subreq->source) {
519 case NETFS_READ_FROM_CACHE:
520 netfs_stat(&netfs_n_rh_read_done);
521 break;
522 case NETFS_DOWNLOAD_FROM_SERVER:
523 netfs_stat(&netfs_n_rh_download_done);
524 break;
525 default:
526 break;
527 }
528
529 /* Deal with retry requests, short reads and errors. If we retry
530 * but don't make progress, we abandon the attempt.
531 */
532 if (!subreq->error && subreq->transferred < subreq->len) {
533 if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
534 trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
535 } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
536 trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear);
537 } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
538 trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry);
539 } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
540 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
541 trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read);
542 } else {
543 __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
544 subreq->error = -ENODATA;
545 trace_netfs_sreq(subreq, netfs_sreq_trace_short);
546 }
547 }
548
549 if (unlikely(subreq->error < 0)) {
550 trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read);
551 if (subreq->source == NETFS_READ_FROM_CACHE) {
552 netfs_stat(&netfs_n_rh_read_failed);
553 __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
554 } else {
555 netfs_stat(&netfs_n_rh_download_failed);
556 __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
557 }
558 trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
559 set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
560 }
561
562 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
563 netfs_subreq_clear_in_progress(subreq);
564 netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
565 }
566 EXPORT_SYMBOL(netfs_read_subreq_terminated);
567
568 /*
569 * Handle termination of a read from the cache.
570 */
netfs_cache_read_terminated(void * priv,ssize_t transferred_or_error)571 void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
572 {
573 struct netfs_io_subrequest *subreq = priv;
574
575 if (transferred_or_error > 0) {
576 subreq->error = 0;
577 if (transferred_or_error > 0) {
578 subreq->transferred += transferred_or_error;
579 __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
580 }
581 } else {
582 subreq->error = transferred_or_error;
583 }
584 netfs_read_subreq_terminated(subreq);
585 }
586